4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
47 static inline int cifs_convert_flags(unsigned int flags)
49 if ((flags & O_ACCMODE) == O_RDONLY)
51 else if ((flags & O_ACCMODE) == O_WRONLY)
53 else if ((flags & O_ACCMODE) == O_RDWR) {
54 /* GENERIC_ALL is too much permission to request
55 can cause unnecessary access denied on create */
56 /* return GENERIC_ALL; */
57 return (GENERIC_READ | GENERIC_WRITE);
60 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
65 static u32 cifs_posix_convert_flags(unsigned int flags)
69 if ((flags & O_ACCMODE) == O_RDONLY)
70 posix_flags = SMB_O_RDONLY;
71 else if ((flags & O_ACCMODE) == O_WRONLY)
72 posix_flags = SMB_O_WRONLY;
73 else if ((flags & O_ACCMODE) == O_RDWR)
74 posix_flags = SMB_O_RDWR;
76 if (flags & O_CREAT) {
77 posix_flags |= SMB_O_CREAT;
79 posix_flags |= SMB_O_EXCL;
80 } else if (flags & O_EXCL)
81 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82 current->comm, current->tgid);
85 posix_flags |= SMB_O_TRUNC;
86 /* be safe and imply O_SYNC for O_DSYNC */
88 posix_flags |= SMB_O_SYNC;
89 if (flags & O_DIRECTORY)
90 posix_flags |= SMB_O_DIRECTORY;
91 if (flags & O_NOFOLLOW)
92 posix_flags |= SMB_O_NOFOLLOW;
94 posix_flags |= SMB_O_DIRECT;
99 static inline int cifs_get_disposition(unsigned int flags)
101 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104 return FILE_OVERWRITE_IF;
105 else if ((flags & O_CREAT) == O_CREAT)
107 else if ((flags & O_TRUNC) == O_TRUNC)
108 return FILE_OVERWRITE;
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114 struct super_block *sb, int mode, unsigned int f_flags,
115 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
118 FILE_UNIX_BASIC_INFO *presp_data;
119 __u32 posix_flags = 0;
120 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121 struct cifs_fattr fattr;
122 struct tcon_link *tlink;
123 struct cifs_tcon *tcon;
125 cifs_dbg(FYI, "posix open %s\n", full_path);
127 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128 if (presp_data == NULL)
131 tlink = cifs_sb_tlink(cifs_sb);
137 tcon = tlink_tcon(tlink);
138 mode &= ~current_umask();
140 posix_flags = cifs_posix_convert_flags(f_flags);
141 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142 poplock, full_path, cifs_sb->local_nls,
143 cifs_remap(cifs_sb));
144 cifs_put_tlink(tlink);
149 if (presp_data->Type == cpu_to_le32(-1))
150 goto posix_open_ret; /* open ok, caller does qpathinfo */
153 goto posix_open_ret; /* caller does not need info */
155 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157 /* get new inode and set it up */
158 if (*pinode == NULL) {
159 cifs_fill_uniqueid(sb, &fattr);
160 *pinode = cifs_iget(sb, &fattr);
166 cifs_fattr_to_inode(*pinode, &fattr);
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177 struct cifs_fid *fid, unsigned int xid)
182 int create_options = CREATE_NOT_DIR;
184 struct TCP_Server_Info *server = tcon->ses->server;
185 struct cifs_open_parms oparms;
187 if (!server->ops->open)
190 desired_access = cifs_convert_flags(f_flags);
192 /*********************************************************************
193 * open flag mapping table:
195 * POSIX Flag CIFS Disposition
196 * ---------- ----------------
197 * O_CREAT FILE_OPEN_IF
198 * O_CREAT | O_EXCL FILE_CREATE
199 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
200 * O_TRUNC FILE_OVERWRITE
201 * none of the above FILE_OPEN
203 * Note that there is not a direct match between disposition
204 * FILE_SUPERSEDE (ie create whether or not file exists although
205 * O_CREAT | O_TRUNC is similar but truncates the existing
206 * file rather than creating a new file as FILE_SUPERSEDE does
207 * (which uses the attributes / metadata passed in on open call)
209 *? O_SYNC is a reasonable match to CIFS writethrough flag
210 *? and the read write flags match reasonably. O_LARGEFILE
211 *? is irrelevant because largefile support is always used
212 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214 *********************************************************************/
216 disposition = cifs_get_disposition(f_flags);
218 /* BB pass O_SYNC flag through on file attributes .. BB */
220 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
224 if (backup_cred(cifs_sb))
225 create_options |= CREATE_OPEN_BACKUP_INTENT;
227 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228 if (f_flags & O_SYNC)
229 create_options |= CREATE_WRITE_THROUGH;
231 if (f_flags & O_DIRECT)
232 create_options |= CREATE_NO_BUFFER;
235 oparms.cifs_sb = cifs_sb;
236 oparms.desired_access = desired_access;
237 oparms.create_options = create_options;
238 oparms.disposition = disposition;
239 oparms.path = full_path;
241 oparms.reconnect = false;
243 rc = server->ops->open(xid, &oparms, oplock, buf);
249 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
252 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
261 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 struct cifs_fid_locks *cur;
264 bool has_locks = false;
266 down_read(&cinode->lock_sem);
267 list_for_each_entry(cur, &cinode->llist, llist) {
268 if (!list_empty(&cur->locks)) {
273 up_read(&cinode->lock_sem);
277 struct cifsFileInfo *
278 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
279 struct tcon_link *tlink, __u32 oplock)
281 struct dentry *dentry = file_dentry(file);
282 struct inode *inode = d_inode(dentry);
283 struct cifsInodeInfo *cinode = CIFS_I(inode);
284 struct cifsFileInfo *cfile;
285 struct cifs_fid_locks *fdlocks;
286 struct cifs_tcon *tcon = tlink_tcon(tlink);
287 struct TCP_Server_Info *server = tcon->ses->server;
289 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
293 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
299 INIT_LIST_HEAD(&fdlocks->locks);
300 fdlocks->cfile = cfile;
301 cfile->llist = fdlocks;
302 down_write(&cinode->lock_sem);
303 list_add(&fdlocks->llist, &cinode->llist);
304 up_write(&cinode->lock_sem);
307 cfile->pid = current->tgid;
308 cfile->uid = current_fsuid();
309 cfile->dentry = dget(dentry);
310 cfile->f_flags = file->f_flags;
311 cfile->invalidHandle = false;
312 cfile->tlink = cifs_get_tlink(tlink);
313 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
314 mutex_init(&cfile->fh_mutex);
315 spin_lock_init(&cfile->file_info_lock);
317 cifs_sb_active(inode->i_sb);
320 * If the server returned a read oplock and we have mandatory brlocks,
321 * set oplock level to None.
323 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
324 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
328 spin_lock(&tcon->open_file_lock);
329 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
330 oplock = fid->pending_open->oplock;
331 list_del(&fid->pending_open->olist);
333 fid->purge_cache = false;
334 server->ops->set_fid(cfile, fid, oplock);
336 list_add(&cfile->tlist, &tcon->openFileList);
338 /* if readable file instance put first in list*/
339 if (file->f_mode & FMODE_READ)
340 list_add(&cfile->flist, &cinode->openFileList);
342 list_add_tail(&cfile->flist, &cinode->openFileList);
343 spin_unlock(&tcon->open_file_lock);
345 if (fid->purge_cache)
346 cifs_zap_mapping(inode);
348 file->private_data = cfile;
352 struct cifsFileInfo *
353 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
355 spin_lock(&cifs_file->file_info_lock);
356 cifsFileInfo_get_locked(cifs_file);
357 spin_unlock(&cifs_file->file_info_lock);
362 * Release a reference on the file private data. This may involve closing
363 * the filehandle out on the server. Must be called without holding
364 * tcon->open_file_lock and cifs_file->file_info_lock.
366 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
368 struct inode *inode = d_inode(cifs_file->dentry);
369 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
370 struct TCP_Server_Info *server = tcon->ses->server;
371 struct cifsInodeInfo *cifsi = CIFS_I(inode);
372 struct super_block *sb = inode->i_sb;
373 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
374 struct cifsLockInfo *li, *tmp;
376 struct cifs_pending_open open;
377 bool oplock_break_cancelled;
379 spin_lock(&tcon->open_file_lock);
381 spin_lock(&cifs_file->file_info_lock);
382 if (--cifs_file->count > 0) {
383 spin_unlock(&cifs_file->file_info_lock);
384 spin_unlock(&tcon->open_file_lock);
387 spin_unlock(&cifs_file->file_info_lock);
389 if (server->ops->get_lease_key)
390 server->ops->get_lease_key(inode, &fid);
392 /* store open in pending opens to make sure we don't miss lease break */
393 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
395 /* remove it from the lists */
396 list_del(&cifs_file->flist);
397 list_del(&cifs_file->tlist);
399 if (list_empty(&cifsi->openFileList)) {
400 cifs_dbg(FYI, "closing last open instance for inode %p\n",
401 d_inode(cifs_file->dentry));
403 * In strict cache mode we need invalidate mapping on the last
404 * close because it may cause a error when we open this file
405 * again and get at least level II oplock.
407 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
408 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
409 cifs_set_oplock_level(cifsi, 0);
412 spin_unlock(&tcon->open_file_lock);
414 oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
416 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
417 struct TCP_Server_Info *server = tcon->ses->server;
421 if (server->ops->close)
422 server->ops->close(xid, tcon, &cifs_file->fid);
426 if (oplock_break_cancelled)
427 cifs_done_oplock_break(cifsi);
429 cifs_del_pending_open(&open);
432 * Delete any outstanding lock records. We'll lose them when the file
435 down_write(&cifsi->lock_sem);
436 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
437 list_del(&li->llist);
438 cifs_del_lock_waiters(li);
441 list_del(&cifs_file->llist->llist);
442 kfree(cifs_file->llist);
443 up_write(&cifsi->lock_sem);
445 cifs_put_tlink(cifs_file->tlink);
446 dput(cifs_file->dentry);
447 cifs_sb_deactive(sb);
451 int cifs_open(struct inode *inode, struct file *file)
457 struct cifs_sb_info *cifs_sb;
458 struct TCP_Server_Info *server;
459 struct cifs_tcon *tcon;
460 struct tcon_link *tlink;
461 struct cifsFileInfo *cfile = NULL;
462 char *full_path = NULL;
463 bool posix_open_ok = false;
465 struct cifs_pending_open open;
469 cifs_sb = CIFS_SB(inode->i_sb);
470 tlink = cifs_sb_tlink(cifs_sb);
473 return PTR_ERR(tlink);
475 tcon = tlink_tcon(tlink);
476 server = tcon->ses->server;
478 full_path = build_path_from_dentry(file_dentry(file));
479 if (full_path == NULL) {
484 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
485 inode, file->f_flags, full_path);
487 if (file->f_flags & O_DIRECT &&
488 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
489 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
490 file->f_op = &cifs_file_direct_nobrl_ops;
492 file->f_op = &cifs_file_direct_ops;
500 if (!tcon->broken_posix_open && tcon->unix_ext &&
501 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
502 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
503 /* can not refresh inode info since size could be stale */
504 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
505 cifs_sb->mnt_file_mode /* ignored */,
506 file->f_flags, &oplock, &fid.netfid, xid);
508 cifs_dbg(FYI, "posix open succeeded\n");
509 posix_open_ok = true;
510 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
511 if (tcon->ses->serverNOS)
512 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
513 tcon->ses->serverName,
514 tcon->ses->serverNOS);
515 tcon->broken_posix_open = true;
516 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
517 (rc != -EOPNOTSUPP)) /* path not found or net err */
520 * Else fallthrough to retry open the old way on network i/o
525 if (server->ops->get_lease_key)
526 server->ops->get_lease_key(inode, &fid);
528 cifs_add_pending_open(&fid, tlink, &open);
530 if (!posix_open_ok) {
531 if (server->ops->get_lease_key)
532 server->ops->get_lease_key(inode, &fid);
534 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
535 file->f_flags, &oplock, &fid, xid);
537 cifs_del_pending_open(&open);
542 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
544 if (server->ops->close)
545 server->ops->close(xid, tcon, &fid);
546 cifs_del_pending_open(&open);
551 cifs_fscache_set_inode_cookie(inode, file);
553 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
555 * Time to set mode which we can not set earlier due to
556 * problems creating new read-only files.
558 struct cifs_unix_set_info_args args = {
559 .mode = inode->i_mode,
560 .uid = INVALID_UID, /* no change */
561 .gid = INVALID_GID, /* no change */
562 .ctime = NO_CHANGE_64,
563 .atime = NO_CHANGE_64,
564 .mtime = NO_CHANGE_64,
567 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
574 cifs_put_tlink(tlink);
578 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
581 * Try to reacquire byte range locks that were released when session
582 * to server was lost.
585 cifs_relock_file(struct cifsFileInfo *cfile)
587 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
588 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
589 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
592 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
593 if (cinode->can_cache_brlcks) {
594 /* can cache locks - no need to relock */
595 up_read(&cinode->lock_sem);
599 if (cap_unix(tcon->ses) &&
600 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
601 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
602 rc = cifs_push_posix_locks(cfile);
604 rc = tcon->ses->server->ops->push_mand_locks(cfile);
606 up_read(&cinode->lock_sem);
611 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
616 struct cifs_sb_info *cifs_sb;
617 struct cifs_tcon *tcon;
618 struct TCP_Server_Info *server;
619 struct cifsInodeInfo *cinode;
621 char *full_path = NULL;
623 int disposition = FILE_OPEN;
624 int create_options = CREATE_NOT_DIR;
625 struct cifs_open_parms oparms;
628 mutex_lock(&cfile->fh_mutex);
629 if (!cfile->invalidHandle) {
630 mutex_unlock(&cfile->fh_mutex);
636 inode = d_inode(cfile->dentry);
637 cifs_sb = CIFS_SB(inode->i_sb);
638 tcon = tlink_tcon(cfile->tlink);
639 server = tcon->ses->server;
642 * Can not grab rename sem here because various ops, including those
643 * that already have the rename sem can end up causing writepage to get
644 * called and if the server was down that means we end up here, and we
645 * can never tell if the caller already has the rename_sem.
647 full_path = build_path_from_dentry(cfile->dentry);
648 if (full_path == NULL) {
650 mutex_unlock(&cfile->fh_mutex);
655 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
656 inode, cfile->f_flags, full_path);
658 if (tcon->ses->server->oplocks)
663 if (tcon->unix_ext && cap_unix(tcon->ses) &&
664 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
665 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
667 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
668 * original open. Must mask them off for a reopen.
670 unsigned int oflags = cfile->f_flags &
671 ~(O_CREAT | O_EXCL | O_TRUNC);
673 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
674 cifs_sb->mnt_file_mode /* ignored */,
675 oflags, &oplock, &cfile->fid.netfid, xid);
677 cifs_dbg(FYI, "posix reopen succeeded\n");
678 oparms.reconnect = true;
682 * fallthrough to retry open the old way on errors, especially
683 * in the reconnect path it is important to retry hard
687 desired_access = cifs_convert_flags(cfile->f_flags);
689 if (backup_cred(cifs_sb))
690 create_options |= CREATE_OPEN_BACKUP_INTENT;
692 if (server->ops->get_lease_key)
693 server->ops->get_lease_key(inode, &cfile->fid);
696 oparms.cifs_sb = cifs_sb;
697 oparms.desired_access = desired_access;
698 oparms.create_options = create_options;
699 oparms.disposition = disposition;
700 oparms.path = full_path;
701 oparms.fid = &cfile->fid;
702 oparms.reconnect = true;
705 * Can not refresh inode by passing in file_info buf to be returned by
706 * ops->open and then calling get_inode_info with returned buf since
707 * file might have write behind data that needs to be flushed and server
708 * version of file size can be stale. If we knew for sure that inode was
709 * not dirty locally we could do this.
711 rc = server->ops->open(xid, &oparms, &oplock, NULL);
712 if (rc == -ENOENT && oparms.reconnect == false) {
713 /* durable handle timeout is expired - open the file again */
714 rc = server->ops->open(xid, &oparms, &oplock, NULL);
715 /* indicate that we need to relock the file */
716 oparms.reconnect = true;
720 mutex_unlock(&cfile->fh_mutex);
721 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
722 cifs_dbg(FYI, "oplock: %d\n", oplock);
723 goto reopen_error_exit;
727 cfile->invalidHandle = false;
728 mutex_unlock(&cfile->fh_mutex);
729 cinode = CIFS_I(inode);
732 rc = filemap_write_and_wait(inode->i_mapping);
733 mapping_set_error(inode->i_mapping, rc);
736 rc = cifs_get_inode_info_unix(&inode, full_path,
739 rc = cifs_get_inode_info(&inode, full_path, NULL,
740 inode->i_sb, xid, NULL);
743 * Else we are writing out data to server already and could deadlock if
744 * we tried to flush data, and since we do not know if we have data that
745 * would invalidate the current end of file on the server we can not go
746 * to the server to get the new inode info.
750 * If the server returned a read oplock and we have mandatory brlocks,
751 * set oplock level to None.
753 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
754 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
758 server->ops->set_fid(cfile, &cfile->fid, oplock);
759 if (oparms.reconnect)
760 cifs_relock_file(cfile);
768 int cifs_close(struct inode *inode, struct file *file)
770 if (file->private_data != NULL) {
771 cifsFileInfo_put(file->private_data);
772 file->private_data = NULL;
775 /* return code from the ->release op is always ignored */
780 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
782 struct cifsFileInfo *open_file;
783 struct list_head *tmp;
784 struct list_head *tmp1;
785 struct list_head tmp_list;
787 if (!tcon->use_persistent || !tcon->need_reopen_files)
790 tcon->need_reopen_files = false;
792 cifs_dbg(FYI, "Reopen persistent handles");
793 INIT_LIST_HEAD(&tmp_list);
795 /* list all files open on tree connection, reopen resilient handles */
796 spin_lock(&tcon->open_file_lock);
797 list_for_each(tmp, &tcon->openFileList) {
798 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
799 if (!open_file->invalidHandle)
801 cifsFileInfo_get(open_file);
802 list_add_tail(&open_file->rlist, &tmp_list);
804 spin_unlock(&tcon->open_file_lock);
806 list_for_each_safe(tmp, tmp1, &tmp_list) {
807 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
808 if (cifs_reopen_file(open_file, false /* do not flush */))
809 tcon->need_reopen_files = true;
810 list_del_init(&open_file->rlist);
811 cifsFileInfo_put(open_file);
815 int cifs_closedir(struct inode *inode, struct file *file)
819 struct cifsFileInfo *cfile = file->private_data;
820 struct cifs_tcon *tcon;
821 struct TCP_Server_Info *server;
824 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
830 tcon = tlink_tcon(cfile->tlink);
831 server = tcon->ses->server;
833 cifs_dbg(FYI, "Freeing private data in close dir\n");
834 spin_lock(&cfile->file_info_lock);
835 if (server->ops->dir_needs_close(cfile)) {
836 cfile->invalidHandle = true;
837 spin_unlock(&cfile->file_info_lock);
838 if (server->ops->close_dir)
839 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
842 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
843 /* not much we can do if it fails anyway, ignore rc */
846 spin_unlock(&cfile->file_info_lock);
848 buf = cfile->srch_inf.ntwrk_buf_start;
850 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
851 cfile->srch_inf.ntwrk_buf_start = NULL;
852 if (cfile->srch_inf.smallBuf)
853 cifs_small_buf_release(buf);
855 cifs_buf_release(buf);
858 cifs_put_tlink(cfile->tlink);
859 kfree(file->private_data);
860 file->private_data = NULL;
861 /* BB can we lock the filestruct while this is going on? */
866 static struct cifsLockInfo *
867 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
869 struct cifsLockInfo *lock =
870 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
873 lock->offset = offset;
874 lock->length = length;
876 lock->pid = current->tgid;
877 INIT_LIST_HEAD(&lock->blist);
878 init_waitqueue_head(&lock->block_q);
883 cifs_del_lock_waiters(struct cifsLockInfo *lock)
885 struct cifsLockInfo *li, *tmp;
886 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
887 list_del_init(&li->blist);
888 wake_up(&li->block_q);
892 #define CIFS_LOCK_OP 0
893 #define CIFS_READ_OP 1
894 #define CIFS_WRITE_OP 2
896 /* @rw_check : 0 - no op, 1 - read, 2 - write */
898 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
899 __u64 length, __u8 type, struct cifsFileInfo *cfile,
900 struct cifsLockInfo **conf_lock, int rw_check)
902 struct cifsLockInfo *li;
903 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
904 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
906 list_for_each_entry(li, &fdlocks->locks, llist) {
907 if (offset + length <= li->offset ||
908 offset >= li->offset + li->length)
910 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
911 server->ops->compare_fids(cfile, cur_cfile)) {
912 /* shared lock prevents write op through the same fid */
913 if (!(li->type & server->vals->shared_lock_type) ||
914 rw_check != CIFS_WRITE_OP)
917 if ((type & server->vals->shared_lock_type) &&
918 ((server->ops->compare_fids(cfile, cur_cfile) &&
919 current->tgid == li->pid) || type == li->type))
929 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
930 __u8 type, struct cifsLockInfo **conf_lock,
934 struct cifs_fid_locks *cur;
935 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
937 list_for_each_entry(cur, &cinode->llist, llist) {
938 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
939 cfile, conf_lock, rw_check);
948 * Check if there is another lock that prevents us to set the lock (mandatory
949 * style). If such a lock exists, update the flock structure with its
950 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
951 * or leave it the same if we can't. Returns 0 if we don't need to request to
952 * the server or 1 otherwise.
955 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
956 __u8 type, struct file_lock *flock)
959 struct cifsLockInfo *conf_lock;
960 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
961 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
964 down_read(&cinode->lock_sem);
966 exist = cifs_find_lock_conflict(cfile, offset, length, type,
967 &conf_lock, CIFS_LOCK_OP);
969 flock->fl_start = conf_lock->offset;
970 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
971 flock->fl_pid = conf_lock->pid;
972 if (conf_lock->type & server->vals->shared_lock_type)
973 flock->fl_type = F_RDLCK;
975 flock->fl_type = F_WRLCK;
976 } else if (!cinode->can_cache_brlcks)
979 flock->fl_type = F_UNLCK;
981 up_read(&cinode->lock_sem);
986 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
988 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
989 down_write(&cinode->lock_sem);
990 list_add_tail(&lock->llist, &cfile->llist->locks);
991 up_write(&cinode->lock_sem);
995 * Set the byte-range lock (mandatory style). Returns:
996 * 1) 0, if we set the lock and don't need to request to the server;
997 * 2) 1, if no locks prevent us but we need to request to the server;
998 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
1001 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1004 struct cifsLockInfo *conf_lock;
1005 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1011 down_write(&cinode->lock_sem);
1013 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1014 lock->type, &conf_lock, CIFS_LOCK_OP);
1015 if (!exist && cinode->can_cache_brlcks) {
1016 list_add_tail(&lock->llist, &cfile->llist->locks);
1017 up_write(&cinode->lock_sem);
1026 list_add_tail(&lock->blist, &conf_lock->blist);
1027 up_write(&cinode->lock_sem);
1028 rc = wait_event_interruptible(lock->block_q,
1029 (lock->blist.prev == &lock->blist) &&
1030 (lock->blist.next == &lock->blist));
1033 down_write(&cinode->lock_sem);
1034 list_del_init(&lock->blist);
1037 up_write(&cinode->lock_sem);
1042 * Check if there is another lock that prevents us to set the lock (posix
1043 * style). If such a lock exists, update the flock structure with its
1044 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1045 * or leave it the same if we can't. Returns 0 if we don't need to request to
1046 * the server or 1 otherwise.
1049 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1052 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1053 unsigned char saved_type = flock->fl_type;
1055 if ((flock->fl_flags & FL_POSIX) == 0)
1058 down_read(&cinode->lock_sem);
1059 posix_test_lock(file, flock);
1061 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1062 flock->fl_type = saved_type;
1066 up_read(&cinode->lock_sem);
1071 * Set the byte-range lock (posix style). Returns:
1072 * 1) 0, if we set the lock and don't need to request to the server;
1073 * 2) 1, if we need to request to the server;
1074 * 3) <0, if the error occurs while setting the lock.
1077 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1079 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1082 if ((flock->fl_flags & FL_POSIX) == 0)
1086 down_write(&cinode->lock_sem);
1087 if (!cinode->can_cache_brlcks) {
1088 up_write(&cinode->lock_sem);
1092 rc = posix_lock_file(file, flock, NULL);
1093 up_write(&cinode->lock_sem);
1094 if (rc == FILE_LOCK_DEFERRED) {
1095 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1098 posix_unblock_lock(flock);
1104 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1107 int rc = 0, stored_rc;
1108 struct cifsLockInfo *li, *tmp;
1109 struct cifs_tcon *tcon;
1110 unsigned int num, max_num, max_buf;
1111 LOCKING_ANDX_RANGE *buf, *cur;
1112 static const int types[] = {
1113 LOCKING_ANDX_LARGE_FILES,
1114 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1119 tcon = tlink_tcon(cfile->tlink);
1122 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1123 * and check it for zero before using.
1125 max_buf = tcon->ses->server->maxBuf;
1131 max_num = (max_buf - sizeof(struct smb_hdr)) /
1132 sizeof(LOCKING_ANDX_RANGE);
1133 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1139 for (i = 0; i < 2; i++) {
1142 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1143 if (li->type != types[i])
1145 cur->Pid = cpu_to_le16(li->pid);
1146 cur->LengthLow = cpu_to_le32((u32)li->length);
1147 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1148 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1149 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1150 if (++num == max_num) {
1151 stored_rc = cifs_lockv(xid, tcon,
1153 (__u8)li->type, 0, num,
1164 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1165 (__u8)types[i], 0, num, buf);
1177 hash_lockowner(fl_owner_t owner)
1179 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1182 struct lock_to_push {
1183 struct list_head llist;
1192 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1194 struct inode *inode = d_inode(cfile->dentry);
1195 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1196 struct file_lock *flock;
1197 struct file_lock_context *flctx = inode->i_flctx;
1198 unsigned int count = 0, i;
1199 int rc = 0, xid, type;
1200 struct list_head locks_to_send, *el;
1201 struct lock_to_push *lck, *tmp;
1209 spin_lock(&flctx->flc_lock);
1210 list_for_each(el, &flctx->flc_posix) {
1213 spin_unlock(&flctx->flc_lock);
1215 INIT_LIST_HEAD(&locks_to_send);
1218 * Allocating count locks is enough because no FL_POSIX locks can be
1219 * added to the list while we are holding cinode->lock_sem that
1220 * protects locking operations of this inode.
1222 for (i = 0; i < count; i++) {
1223 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1228 list_add_tail(&lck->llist, &locks_to_send);
1231 el = locks_to_send.next;
1232 spin_lock(&flctx->flc_lock);
1233 list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1234 if (el == &locks_to_send) {
1236 * The list ended. We don't have enough allocated
1237 * structures - something is really wrong.
1239 cifs_dbg(VFS, "Can't push all brlocks!\n");
1242 length = 1 + flock->fl_end - flock->fl_start;
1243 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1247 lck = list_entry(el, struct lock_to_push, llist);
1248 lck->pid = hash_lockowner(flock->fl_owner);
1249 lck->netfid = cfile->fid.netfid;
1250 lck->length = length;
1252 lck->offset = flock->fl_start;
1254 spin_unlock(&flctx->flc_lock);
1256 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1259 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1260 lck->offset, lck->length, NULL,
1264 list_del(&lck->llist);
1272 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1273 list_del(&lck->llist);
1280 cifs_push_locks(struct cifsFileInfo *cfile)
1282 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1283 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1284 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1287 /* we are going to update can_cache_brlcks here - need a write access */
1288 down_write(&cinode->lock_sem);
1289 if (!cinode->can_cache_brlcks) {
1290 up_write(&cinode->lock_sem);
1294 if (cap_unix(tcon->ses) &&
1295 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1296 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1297 rc = cifs_push_posix_locks(cfile);
1299 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1301 cinode->can_cache_brlcks = false;
1302 up_write(&cinode->lock_sem);
1307 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1308 bool *wait_flag, struct TCP_Server_Info *server)
1310 if (flock->fl_flags & FL_POSIX)
1311 cifs_dbg(FYI, "Posix\n");
1312 if (flock->fl_flags & FL_FLOCK)
1313 cifs_dbg(FYI, "Flock\n");
1314 if (flock->fl_flags & FL_SLEEP) {
1315 cifs_dbg(FYI, "Blocking lock\n");
1318 if (flock->fl_flags & FL_ACCESS)
1319 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1320 if (flock->fl_flags & FL_LEASE)
1321 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1322 if (flock->fl_flags &
1323 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1324 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1325 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1327 *type = server->vals->large_lock_type;
1328 if (flock->fl_type == F_WRLCK) {
1329 cifs_dbg(FYI, "F_WRLCK\n");
1330 *type |= server->vals->exclusive_lock_type;
1332 } else if (flock->fl_type == F_UNLCK) {
1333 cifs_dbg(FYI, "F_UNLCK\n");
1334 *type |= server->vals->unlock_lock_type;
1336 /* Check if unlock includes more than one lock range */
1337 } else if (flock->fl_type == F_RDLCK) {
1338 cifs_dbg(FYI, "F_RDLCK\n");
1339 *type |= server->vals->shared_lock_type;
1341 } else if (flock->fl_type == F_EXLCK) {
1342 cifs_dbg(FYI, "F_EXLCK\n");
1343 *type |= server->vals->exclusive_lock_type;
1345 } else if (flock->fl_type == F_SHLCK) {
1346 cifs_dbg(FYI, "F_SHLCK\n");
1347 *type |= server->vals->shared_lock_type;
1350 cifs_dbg(FYI, "Unknown type of lock\n");
1354 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1355 bool wait_flag, bool posix_lck, unsigned int xid)
1358 __u64 length = 1 + flock->fl_end - flock->fl_start;
1359 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1360 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1361 struct TCP_Server_Info *server = tcon->ses->server;
1362 __u16 netfid = cfile->fid.netfid;
1365 int posix_lock_type;
1367 rc = cifs_posix_lock_test(file, flock);
1371 if (type & server->vals->shared_lock_type)
1372 posix_lock_type = CIFS_RDLCK;
1374 posix_lock_type = CIFS_WRLCK;
1375 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1376 hash_lockowner(flock->fl_owner),
1377 flock->fl_start, length, flock,
1378 posix_lock_type, wait_flag);
1382 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1386 /* BB we could chain these into one lock request BB */
1387 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1390 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1392 flock->fl_type = F_UNLCK;
1394 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1399 if (type & server->vals->shared_lock_type) {
1400 flock->fl_type = F_WRLCK;
1404 type &= ~server->vals->exclusive_lock_type;
1406 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1407 type | server->vals->shared_lock_type,
1410 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1411 type | server->vals->shared_lock_type, 0, 1, false);
1412 flock->fl_type = F_RDLCK;
1414 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1417 flock->fl_type = F_WRLCK;
1423 cifs_move_llist(struct list_head *source, struct list_head *dest)
1425 struct list_head *li, *tmp;
1426 list_for_each_safe(li, tmp, source)
1427 list_move(li, dest);
1431 cifs_free_llist(struct list_head *llist)
1433 struct cifsLockInfo *li, *tmp;
1434 list_for_each_entry_safe(li, tmp, llist, llist) {
1435 cifs_del_lock_waiters(li);
1436 list_del(&li->llist);
1442 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1445 int rc = 0, stored_rc;
1446 static const int types[] = {
1447 LOCKING_ANDX_LARGE_FILES,
1448 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1451 unsigned int max_num, num, max_buf;
1452 LOCKING_ANDX_RANGE *buf, *cur;
1453 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1454 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1455 struct cifsLockInfo *li, *tmp;
1456 __u64 length = 1 + flock->fl_end - flock->fl_start;
1457 struct list_head tmp_llist;
1459 INIT_LIST_HEAD(&tmp_llist);
1462 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1463 * and check it for zero before using.
1465 max_buf = tcon->ses->server->maxBuf;
1469 max_num = (max_buf - sizeof(struct smb_hdr)) /
1470 sizeof(LOCKING_ANDX_RANGE);
1471 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1475 down_write(&cinode->lock_sem);
1476 for (i = 0; i < 2; i++) {
1479 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1480 if (flock->fl_start > li->offset ||
1481 (flock->fl_start + length) <
1482 (li->offset + li->length))
1484 if (current->tgid != li->pid)
1486 if (types[i] != li->type)
1488 if (cinode->can_cache_brlcks) {
1490 * We can cache brlock requests - simply remove
1491 * a lock from the file's list.
1493 list_del(&li->llist);
1494 cifs_del_lock_waiters(li);
1498 cur->Pid = cpu_to_le16(li->pid);
1499 cur->LengthLow = cpu_to_le32((u32)li->length);
1500 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1501 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1502 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1504 * We need to save a lock here to let us add it again to
1505 * the file's list if the unlock range request fails on
1508 list_move(&li->llist, &tmp_llist);
1509 if (++num == max_num) {
1510 stored_rc = cifs_lockv(xid, tcon,
1512 li->type, num, 0, buf);
1515 * We failed on the unlock range
1516 * request - add all locks from the tmp
1517 * list to the head of the file's list.
1519 cifs_move_llist(&tmp_llist,
1520 &cfile->llist->locks);
1524 * The unlock range request succeed -
1525 * free the tmp list.
1527 cifs_free_llist(&tmp_llist);
1534 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1535 types[i], num, 0, buf);
1537 cifs_move_llist(&tmp_llist,
1538 &cfile->llist->locks);
1541 cifs_free_llist(&tmp_llist);
1545 up_write(&cinode->lock_sem);
1551 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1552 bool wait_flag, bool posix_lck, int lock, int unlock,
1556 __u64 length = 1 + flock->fl_end - flock->fl_start;
1557 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1558 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1559 struct TCP_Server_Info *server = tcon->ses->server;
1560 struct inode *inode = d_inode(cfile->dentry);
1563 int posix_lock_type;
1565 rc = cifs_posix_lock_set(file, flock);
1569 if (type & server->vals->shared_lock_type)
1570 posix_lock_type = CIFS_RDLCK;
1572 posix_lock_type = CIFS_WRLCK;
1575 posix_lock_type = CIFS_UNLCK;
1577 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1578 hash_lockowner(flock->fl_owner),
1579 flock->fl_start, length,
1580 NULL, posix_lock_type, wait_flag);
1585 struct cifsLockInfo *lock;
1587 lock = cifs_lock_init(flock->fl_start, length, type);
1591 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1600 * Windows 7 server can delay breaking lease from read to None
1601 * if we set a byte-range lock on a file - break it explicitly
1602 * before sending the lock to the server to be sure the next
1603 * read won't conflict with non-overlapted locks due to
1606 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1607 CIFS_CACHE_READ(CIFS_I(inode))) {
1608 cifs_zap_mapping(inode);
1609 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1611 CIFS_I(inode)->oplock = 0;
1614 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1615 type, 1, 0, wait_flag);
1621 cifs_lock_add(cfile, lock);
1623 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1626 if (flock->fl_flags & FL_POSIX && !rc)
1627 rc = locks_lock_file_wait(file, flock);
1631 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1634 int lock = 0, unlock = 0;
1635 bool wait_flag = false;
1636 bool posix_lck = false;
1637 struct cifs_sb_info *cifs_sb;
1638 struct cifs_tcon *tcon;
1639 struct cifsInodeInfo *cinode;
1640 struct cifsFileInfo *cfile;
1647 cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1648 cmd, flock->fl_flags, flock->fl_type,
1649 flock->fl_start, flock->fl_end);
1651 cfile = (struct cifsFileInfo *)file->private_data;
1652 tcon = tlink_tcon(cfile->tlink);
1654 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1657 cifs_sb = CIFS_FILE_SB(file);
1658 netfid = cfile->fid.netfid;
1659 cinode = CIFS_I(file_inode(file));
1661 if (cap_unix(tcon->ses) &&
1662 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1663 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1666 * BB add code here to normalize offset and length to account for
1667 * negative length which we can not accept over the wire.
1669 if (IS_GETLK(cmd)) {
1670 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1675 if (!lock && !unlock) {
1677 * if no lock or unlock then nothing to do since we do not
1684 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1691 * update the file size (if needed) after a write. Should be called with
1692 * the inode->i_lock held
1695 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1696 unsigned int bytes_written)
1698 loff_t end_of_write = offset + bytes_written;
1700 if (end_of_write > cifsi->server_eof)
1701 cifsi->server_eof = end_of_write;
1705 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1706 size_t write_size, loff_t *offset)
1709 unsigned int bytes_written = 0;
1710 unsigned int total_written;
1711 struct cifs_sb_info *cifs_sb;
1712 struct cifs_tcon *tcon;
1713 struct TCP_Server_Info *server;
1715 struct dentry *dentry = open_file->dentry;
1716 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1717 struct cifs_io_parms io_parms;
1719 cifs_sb = CIFS_SB(dentry->d_sb);
1721 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1722 write_size, *offset, dentry);
1724 tcon = tlink_tcon(open_file->tlink);
1725 server = tcon->ses->server;
1727 if (!server->ops->sync_write)
1732 for (total_written = 0; write_size > total_written;
1733 total_written += bytes_written) {
1735 while (rc == -EAGAIN) {
1739 if (open_file->invalidHandle) {
1740 /* we could deadlock if we called
1741 filemap_fdatawait from here so tell
1742 reopen_file not to flush data to
1744 rc = cifs_reopen_file(open_file, false);
1749 len = min(server->ops->wp_retry_size(d_inode(dentry)),
1750 (unsigned int)write_size - total_written);
1751 /* iov[0] is reserved for smb header */
1752 iov[1].iov_base = (char *)write_data + total_written;
1753 iov[1].iov_len = len;
1755 io_parms.tcon = tcon;
1756 io_parms.offset = *offset;
1757 io_parms.length = len;
1758 rc = server->ops->sync_write(xid, &open_file->fid,
1759 &io_parms, &bytes_written, iov, 1);
1761 if (rc || (bytes_written == 0)) {
1769 spin_lock(&d_inode(dentry)->i_lock);
1770 cifs_update_eof(cifsi, *offset, bytes_written);
1771 spin_unlock(&d_inode(dentry)->i_lock);
1772 *offset += bytes_written;
1776 cifs_stats_bytes_written(tcon, total_written);
1778 if (total_written > 0) {
1779 spin_lock(&d_inode(dentry)->i_lock);
1780 if (*offset > d_inode(dentry)->i_size)
1781 i_size_write(d_inode(dentry), *offset);
1782 spin_unlock(&d_inode(dentry)->i_lock);
1784 mark_inode_dirty_sync(d_inode(dentry));
1786 return total_written;
1789 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1792 struct cifsFileInfo *open_file = NULL;
1793 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1794 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1796 /* only filter by fsuid on multiuser mounts */
1797 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1800 spin_lock(&tcon->open_file_lock);
1801 /* we could simply get the first_list_entry since write-only entries
1802 are always at the end of the list but since the first entry might
1803 have a close pending, we go through the whole list */
1804 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1805 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1807 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1808 if (!open_file->invalidHandle) {
1809 /* found a good file */
1810 /* lock it so it will not be closed on us */
1811 cifsFileInfo_get(open_file);
1812 spin_unlock(&tcon->open_file_lock);
1814 } /* else might as well continue, and look for
1815 another, or simply have the caller reopen it
1816 again rather than trying to fix this handle */
1817 } else /* write only file */
1818 break; /* write only files are last so must be done */
1820 spin_unlock(&tcon->open_file_lock);
1824 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1827 struct cifsFileInfo *open_file, *inv_file = NULL;
1828 struct cifs_sb_info *cifs_sb;
1829 struct cifs_tcon *tcon;
1830 bool any_available = false;
1832 unsigned int refind = 0;
1834 /* Having a null inode here (because mapping->host was set to zero by
1835 the VFS or MM) should not happen but we had reports of on oops (due to
1836 it being zero) during stress testcases so we need to check for it */
1838 if (cifs_inode == NULL) {
1839 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1844 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1845 tcon = cifs_sb_master_tcon(cifs_sb);
1847 /* only filter by fsuid on multiuser mounts */
1848 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1851 spin_lock(&tcon->open_file_lock);
1853 if (refind > MAX_REOPEN_ATT) {
1854 spin_unlock(&tcon->open_file_lock);
1857 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1858 if (!any_available && open_file->pid != current->tgid)
1860 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1862 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1863 if (!open_file->invalidHandle) {
1864 /* found a good writable file */
1865 cifsFileInfo_get(open_file);
1866 spin_unlock(&tcon->open_file_lock);
1870 inv_file = open_file;
1874 /* couldn't find useable FH with same pid, try any available */
1875 if (!any_available) {
1876 any_available = true;
1877 goto refind_writable;
1881 any_available = false;
1882 cifsFileInfo_get(inv_file);
1885 spin_unlock(&tcon->open_file_lock);
1888 rc = cifs_reopen_file(inv_file, false);
1892 spin_lock(&tcon->open_file_lock);
1893 list_move_tail(&inv_file->flist,
1894 &cifs_inode->openFileList);
1895 spin_unlock(&tcon->open_file_lock);
1896 cifsFileInfo_put(inv_file);
1899 spin_lock(&tcon->open_file_lock);
1900 goto refind_writable;
1907 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1909 struct address_space *mapping = page->mapping;
1910 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1913 int bytes_written = 0;
1914 struct inode *inode;
1915 struct cifsFileInfo *open_file;
1917 if (!mapping || !mapping->host)
1920 inode = page->mapping->host;
1922 offset += (loff_t)from;
1923 write_data = kmap(page);
1926 if ((to > PAGE_SIZE) || (from > to)) {
1931 /* racing with truncate? */
1932 if (offset > mapping->host->i_size) {
1934 return 0; /* don't care */
1937 /* check to make sure that we are not extending the file */
1938 if (mapping->host->i_size - offset < (loff_t)to)
1939 to = (unsigned)(mapping->host->i_size - offset);
1941 open_file = find_writable_file(CIFS_I(mapping->host), false);
1943 bytes_written = cifs_write(open_file, open_file->pid,
1944 write_data, to - from, &offset);
1945 cifsFileInfo_put(open_file);
1946 /* Does mm or vfs already set times? */
1947 inode->i_atime = inode->i_mtime = current_time(inode);
1948 if ((bytes_written > 0) && (offset))
1950 else if (bytes_written < 0)
1953 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1961 static struct cifs_writedata *
1962 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1963 pgoff_t end, pgoff_t *index,
1964 unsigned int *found_pages)
1966 unsigned int nr_pages;
1967 struct page **pages;
1968 struct cifs_writedata *wdata;
1970 wdata = cifs_writedata_alloc((unsigned int)tofind,
1971 cifs_writev_complete);
1976 * find_get_pages_tag seems to return a max of 256 on each
1977 * iteration, so we must call it several times in order to
1978 * fill the array or the wsize is effectively limited to
1982 pages = wdata->pages;
1984 nr_pages = find_get_pages_tag(mapping, index,
1985 PAGECACHE_TAG_DIRTY, tofind,
1987 *found_pages += nr_pages;
1990 } while (nr_pages && tofind && *index <= end);
1996 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1997 struct address_space *mapping,
1998 struct writeback_control *wbc,
1999 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2001 unsigned int nr_pages = 0, i;
2004 for (i = 0; i < found_pages; i++) {
2005 page = wdata->pages[i];
2007 * At this point we hold neither mapping->tree_lock nor
2008 * lock on the page itself: the page may be truncated or
2009 * invalidated (changing page->mapping to NULL), or even
2010 * swizzled back from swapper_space to tmpfs file
2016 else if (!trylock_page(page))
2019 if (unlikely(page->mapping != mapping)) {
2024 if (!wbc->range_cyclic && page->index > end) {
2030 if (*next && (page->index != *next)) {
2031 /* Not next consecutive page */
2036 if (wbc->sync_mode != WB_SYNC_NONE)
2037 wait_on_page_writeback(page);
2039 if (PageWriteback(page) ||
2040 !clear_page_dirty_for_io(page)) {
2046 * This actually clears the dirty bit in the radix tree.
2047 * See cifs_writepage() for more commentary.
2049 set_page_writeback(page);
2050 if (page_offset(page) >= i_size_read(mapping->host)) {
2053 end_page_writeback(page);
2057 wdata->pages[i] = page;
2058 *next = page->index + 1;
2062 /* reset index to refind any pages skipped */
2064 *index = wdata->pages[0]->index + 1;
2066 /* put any pages we aren't going to use */
2067 for (i = nr_pages; i < found_pages; i++) {
2068 put_page(wdata->pages[i]);
2069 wdata->pages[i] = NULL;
2076 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2077 struct address_space *mapping, struct writeback_control *wbc)
2080 struct TCP_Server_Info *server;
2083 wdata->sync_mode = wbc->sync_mode;
2084 wdata->nr_pages = nr_pages;
2085 wdata->offset = page_offset(wdata->pages[0]);
2086 wdata->pagesz = PAGE_SIZE;
2087 wdata->tailsz = min(i_size_read(mapping->host) -
2088 page_offset(wdata->pages[nr_pages - 1]),
2090 wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2092 if (wdata->cfile != NULL)
2093 cifsFileInfo_put(wdata->cfile);
2094 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2095 if (!wdata->cfile) {
2096 cifs_dbg(VFS, "No writable handles for inode\n");
2099 wdata->pid = wdata->cfile->pid;
2100 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2101 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2104 for (i = 0; i < nr_pages; ++i)
2105 unlock_page(wdata->pages[i]);
2110 static int cifs_writepages(struct address_space *mapping,
2111 struct writeback_control *wbc)
2113 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2114 struct TCP_Server_Info *server;
2115 bool done = false, scanned = false, range_whole = false;
2117 struct cifs_writedata *wdata;
2121 * If wsize is smaller than the page cache size, default to writing
2122 * one page at a time via cifs_writepage
2124 if (cifs_sb->wsize < PAGE_SIZE)
2125 return generic_writepages(mapping, wbc);
2127 if (wbc->range_cyclic) {
2128 index = mapping->writeback_index; /* Start from prev offset */
2131 index = wbc->range_start >> PAGE_SHIFT;
2132 end = wbc->range_end >> PAGE_SHIFT;
2133 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2137 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2139 while (!done && index <= end) {
2140 unsigned int i, nr_pages, found_pages, wsize, credits;
2141 pgoff_t next = 0, tofind, saved_index = index;
2143 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2148 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2150 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2154 add_credits_and_wake_if(server, credits, 0);
2158 if (found_pages == 0) {
2159 kref_put(&wdata->refcount, cifs_writedata_release);
2160 add_credits_and_wake_if(server, credits, 0);
2164 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2165 end, &index, &next, &done);
2167 /* nothing to write? */
2168 if (nr_pages == 0) {
2169 kref_put(&wdata->refcount, cifs_writedata_release);
2170 add_credits_and_wake_if(server, credits, 0);
2174 wdata->credits = credits;
2176 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2178 /* send failure -- clean up the mess */
2180 add_credits_and_wake_if(server, wdata->credits, 0);
2181 for (i = 0; i < nr_pages; ++i) {
2183 redirty_page_for_writepage(wbc,
2186 SetPageError(wdata->pages[i]);
2187 end_page_writeback(wdata->pages[i]);
2188 put_page(wdata->pages[i]);
2191 mapping_set_error(mapping, rc);
2193 kref_put(&wdata->refcount, cifs_writedata_release);
2195 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2196 index = saved_index;
2200 wbc->nr_to_write -= nr_pages;
2201 if (wbc->nr_to_write <= 0)
2207 if (!scanned && !done) {
2209 * We hit the last page and there is more work to be done: wrap
2210 * back to the start of the file
2217 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2218 mapping->writeback_index = index;
2224 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2230 /* BB add check for wbc flags */
2232 if (!PageUptodate(page))
2233 cifs_dbg(FYI, "ppw - page not up to date\n");
2236 * Set the "writeback" flag, and clear "dirty" in the radix tree.
2238 * A writepage() implementation always needs to do either this,
2239 * or re-dirty the page with "redirty_page_for_writepage()" in
2240 * the case of a failure.
2242 * Just unlocking the page will cause the radix tree tag-bits
2243 * to fail to update with the state of the page correctly.
2245 set_page_writeback(page);
2247 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2248 if (rc == -EAGAIN) {
2249 if (wbc->sync_mode == WB_SYNC_ALL)
2251 redirty_page_for_writepage(wbc, page);
2252 } else if (rc != 0) {
2254 mapping_set_error(page->mapping, rc);
2256 SetPageUptodate(page);
2258 end_page_writeback(page);
2264 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2266 int rc = cifs_writepage_locked(page, wbc);
2271 static int cifs_write_end(struct file *file, struct address_space *mapping,
2272 loff_t pos, unsigned len, unsigned copied,
2273 struct page *page, void *fsdata)
2276 struct inode *inode = mapping->host;
2277 struct cifsFileInfo *cfile = file->private_data;
2278 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2281 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2284 pid = current->tgid;
2286 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2289 if (PageChecked(page)) {
2291 SetPageUptodate(page);
2292 ClearPageChecked(page);
2293 } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2294 SetPageUptodate(page);
2296 if (!PageUptodate(page)) {
2298 unsigned offset = pos & (PAGE_SIZE - 1);
2302 /* this is probably better than directly calling
2303 partialpage_write since in this function the file handle is
2304 known which we might as well leverage */
2305 /* BB check if anything else missing out of ppw
2306 such as updating last write time */
2307 page_data = kmap(page);
2308 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2309 /* if (rc < 0) should we set writebehind rc? */
2316 set_page_dirty(page);
2320 spin_lock(&inode->i_lock);
2321 if (pos > inode->i_size)
2322 i_size_write(inode, pos);
2323 spin_unlock(&inode->i_lock);
2332 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2337 struct cifs_tcon *tcon;
2338 struct TCP_Server_Info *server;
2339 struct cifsFileInfo *smbfile = file->private_data;
2340 struct inode *inode = file_inode(file);
2341 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2343 rc = file_write_and_wait_range(file, start, end);
2350 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2353 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2354 rc = cifs_zap_mapping(inode);
2356 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2357 rc = 0; /* don't care about it in fsync */
2361 tcon = tlink_tcon(smbfile->tlink);
2362 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2363 server = tcon->ses->server;
2364 if (server->ops->flush)
2365 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2371 inode_unlock(inode);
2375 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2379 struct cifs_tcon *tcon;
2380 struct TCP_Server_Info *server;
2381 struct cifsFileInfo *smbfile = file->private_data;
2382 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2383 struct inode *inode = file->f_mapping->host;
2385 rc = file_write_and_wait_range(file, start, end);
2392 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2395 tcon = tlink_tcon(smbfile->tlink);
2396 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2397 server = tcon->ses->server;
2398 if (server->ops->flush)
2399 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2405 inode_unlock(inode);
2410 * As file closes, flush all cached write data for this inode checking
2411 * for write behind errors.
2413 int cifs_flush(struct file *file, fl_owner_t id)
2415 struct inode *inode = file_inode(file);
2418 if (file->f_mode & FMODE_WRITE)
2419 rc = filemap_write_and_wait(inode->i_mapping);
2421 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2427 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2432 for (i = 0; i < num_pages; i++) {
2433 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2436 * save number of pages we have already allocated and
2437 * return with ENOMEM error
2446 for (i = 0; i < num_pages; i++)
2453 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2458 clen = min_t(const size_t, len, wsize);
2459 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2468 cifs_uncached_writedata_release(struct kref *refcount)
2471 struct cifs_writedata *wdata = container_of(refcount,
2472 struct cifs_writedata, refcount);
2474 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2475 for (i = 0; i < wdata->nr_pages; i++)
2476 put_page(wdata->pages[i]);
2477 cifs_writedata_release(refcount);
2480 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2483 cifs_uncached_writev_complete(struct work_struct *work)
2485 struct cifs_writedata *wdata = container_of(work,
2486 struct cifs_writedata, work);
2487 struct inode *inode = d_inode(wdata->cfile->dentry);
2488 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2490 spin_lock(&inode->i_lock);
2491 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2492 if (cifsi->server_eof > inode->i_size)
2493 i_size_write(inode, cifsi->server_eof);
2494 spin_unlock(&inode->i_lock);
2496 complete(&wdata->done);
2497 collect_uncached_write_data(wdata->ctx);
2498 /* the below call can possibly free the last ref to aio ctx */
2499 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2503 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2504 size_t *len, unsigned long *num_pages)
2506 size_t save_len, copied, bytes, cur_len = *len;
2507 unsigned long i, nr_pages = *num_pages;
2510 for (i = 0; i < nr_pages; i++) {
2511 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2512 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2515 * If we didn't copy as much as we expected, then that
2516 * may mean we trod into an unmapped area. Stop copying
2517 * at that point. On the next pass through the big
2518 * loop, we'll likely end up getting a zero-length
2519 * write and bailing out of it.
2524 cur_len = save_len - cur_len;
2528 * If we have no data to send, then that probably means that
2529 * the copy above failed altogether. That's most likely because
2530 * the address in the iovec was bogus. Return -EFAULT and let
2531 * the caller free anything we allocated and bail out.
2537 * i + 1 now represents the number of pages we actually used in
2538 * the copy phase above.
2545 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2546 struct cifsFileInfo *open_file,
2547 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2548 struct cifs_aio_ctx *ctx)
2552 unsigned long nr_pages, num_pages, i;
2553 struct cifs_writedata *wdata;
2554 struct iov_iter saved_from = *from;
2555 loff_t saved_offset = offset;
2557 struct TCP_Server_Info *server;
2559 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2560 pid = open_file->pid;
2562 pid = current->tgid;
2564 server = tlink_tcon(open_file->tlink)->ses->server;
2567 unsigned int wsize, credits;
2569 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2574 nr_pages = get_numpages(wsize, len, &cur_len);
2575 wdata = cifs_writedata_alloc(nr_pages,
2576 cifs_uncached_writev_complete);
2579 add_credits_and_wake_if(server, credits, 0);
2583 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2586 add_credits_and_wake_if(server, credits, 0);
2590 num_pages = nr_pages;
2591 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2593 for (i = 0; i < nr_pages; i++)
2594 put_page(wdata->pages[i]);
2596 add_credits_and_wake_if(server, credits, 0);
2601 * Bring nr_pages down to the number of pages we actually used,
2602 * and free any pages that we didn't use.
2604 for ( ; nr_pages > num_pages; nr_pages--)
2605 put_page(wdata->pages[nr_pages - 1]);
2607 wdata->sync_mode = WB_SYNC_ALL;
2608 wdata->nr_pages = nr_pages;
2609 wdata->offset = (__u64)offset;
2610 wdata->cfile = cifsFileInfo_get(open_file);
2612 wdata->bytes = cur_len;
2613 wdata->pagesz = PAGE_SIZE;
2614 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2615 wdata->credits = credits;
2617 kref_get(&ctx->refcount);
2619 if (!wdata->cfile->invalidHandle ||
2620 !(rc = cifs_reopen_file(wdata->cfile, false)))
2621 rc = server->ops->async_writev(wdata,
2622 cifs_uncached_writedata_release);
2624 add_credits_and_wake_if(server, wdata->credits, 0);
2625 kref_put(&wdata->refcount,
2626 cifs_uncached_writedata_release);
2627 if (rc == -EAGAIN) {
2629 iov_iter_advance(from, offset - saved_offset);
2635 list_add_tail(&wdata->list, wdata_list);
2643 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2645 struct cifs_writedata *wdata, *tmp;
2646 struct cifs_tcon *tcon;
2647 struct cifs_sb_info *cifs_sb;
2648 struct dentry *dentry = ctx->cfile->dentry;
2652 tcon = tlink_tcon(ctx->cfile->tlink);
2653 cifs_sb = CIFS_SB(dentry->d_sb);
2655 mutex_lock(&ctx->aio_mutex);
2657 if (list_empty(&ctx->list)) {
2658 mutex_unlock(&ctx->aio_mutex);
2664 * Wait for and collect replies for any successful sends in order of
2665 * increasing offset. Once an error is hit, then return without waiting
2666 * for any more replies.
2669 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2671 if (!try_wait_for_completion(&wdata->done)) {
2672 mutex_unlock(&ctx->aio_mutex);
2679 ctx->total_len += wdata->bytes;
2681 /* resend call if it's a retryable error */
2682 if (rc == -EAGAIN) {
2683 struct list_head tmp_list;
2684 struct iov_iter tmp_from = ctx->iter;
2686 INIT_LIST_HEAD(&tmp_list);
2687 list_del_init(&wdata->list);
2689 iov_iter_advance(&tmp_from,
2690 wdata->offset - ctx->pos);
2692 rc = cifs_write_from_iter(wdata->offset,
2693 wdata->bytes, &tmp_from,
2694 ctx->cfile, cifs_sb, &tmp_list,
2697 list_splice(&tmp_list, &ctx->list);
2699 kref_put(&wdata->refcount,
2700 cifs_uncached_writedata_release);
2704 list_del_init(&wdata->list);
2705 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2708 for (i = 0; i < ctx->npages; i++)
2709 put_page(ctx->bv[i].bv_page);
2711 cifs_stats_bytes_written(tcon, ctx->total_len);
2712 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2714 ctx->rc = (rc == 0) ? ctx->total_len : rc;
2716 mutex_unlock(&ctx->aio_mutex);
2718 if (ctx->iocb && ctx->iocb->ki_complete)
2719 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2721 complete(&ctx->done);
2724 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2726 struct file *file = iocb->ki_filp;
2727 ssize_t total_written = 0;
2728 struct cifsFileInfo *cfile;
2729 struct cifs_tcon *tcon;
2730 struct cifs_sb_info *cifs_sb;
2731 struct cifs_aio_ctx *ctx;
2732 struct iov_iter saved_from = *from;
2736 * BB - optimize the way when signing is disabled. We can drop this
2737 * extra memory-to-memory copying and use iovec buffers for constructing
2741 rc = generic_write_checks(iocb, from);
2745 cifs_sb = CIFS_FILE_SB(file);
2746 cfile = file->private_data;
2747 tcon = tlink_tcon(cfile->tlink);
2749 if (!tcon->ses->server->ops->async_writev)
2752 ctx = cifs_aio_ctx_alloc();
2756 ctx->cfile = cifsFileInfo_get(cfile);
2758 if (!is_sync_kiocb(iocb))
2761 ctx->pos = iocb->ki_pos;
2763 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2765 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2769 /* grab a lock here due to read response handlers can access ctx */
2770 mutex_lock(&ctx->aio_mutex);
2772 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2773 cfile, cifs_sb, &ctx->list, ctx);
2776 * If at least one write was successfully sent, then discard any rc
2777 * value from the later writes. If the other write succeeds, then
2778 * we'll end up returning whatever was written. If it fails, then
2779 * we'll get a new rc value from that.
2781 if (!list_empty(&ctx->list))
2784 mutex_unlock(&ctx->aio_mutex);
2787 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2791 if (!is_sync_kiocb(iocb)) {
2792 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2793 return -EIOCBQUEUED;
2796 rc = wait_for_completion_killable(&ctx->done);
2798 mutex_lock(&ctx->aio_mutex);
2799 ctx->rc = rc = -EINTR;
2800 total_written = ctx->total_len;
2801 mutex_unlock(&ctx->aio_mutex);
2804 total_written = ctx->total_len;
2807 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2809 if (unlikely(!total_written))
2812 iocb->ki_pos += total_written;
2813 return total_written;
2817 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2819 struct file *file = iocb->ki_filp;
2820 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2821 struct inode *inode = file->f_mapping->host;
2822 struct cifsInodeInfo *cinode = CIFS_I(inode);
2823 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2828 * We need to hold the sem to be sure nobody modifies lock list
2829 * with a brlock that prevents writing.
2831 down_read(&cinode->lock_sem);
2833 rc = generic_write_checks(iocb, from);
2837 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2838 server->vals->exclusive_lock_type, NULL,
2840 rc = __generic_file_write_iter(iocb, from);
2844 up_read(&cinode->lock_sem);
2845 inode_unlock(inode);
2848 rc = generic_write_sync(iocb, rc);
2853 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2855 struct inode *inode = file_inode(iocb->ki_filp);
2856 struct cifsInodeInfo *cinode = CIFS_I(inode);
2857 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2858 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2859 iocb->ki_filp->private_data;
2860 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2863 written = cifs_get_writer(cinode);
2867 if (CIFS_CACHE_WRITE(cinode)) {
2868 if (cap_unix(tcon->ses) &&
2869 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2870 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2871 written = generic_file_write_iter(iocb, from);
2874 written = cifs_writev(iocb, from);
2878 * For non-oplocked files in strict cache mode we need to write the data
2879 * to the server exactly from the pos to pos+len-1 rather than flush all
2880 * affected pages because it may cause a error with mandatory locks on
2881 * these pages but not on the region from pos to ppos+len-1.
2883 written = cifs_user_writev(iocb, from);
2884 if (written > 0 && CIFS_CACHE_READ(cinode)) {
2886 * Windows 7 server can delay breaking level2 oplock if a write
2887 * request comes - break it on the client to prevent reading
2890 cifs_zap_mapping(inode);
2891 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2896 cifs_put_writer(cinode);
2900 static struct cifs_readdata *
2901 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2903 struct cifs_readdata *rdata;
2905 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2907 if (rdata != NULL) {
2908 kref_init(&rdata->refcount);
2909 INIT_LIST_HEAD(&rdata->list);
2910 init_completion(&rdata->done);
2911 INIT_WORK(&rdata->work, complete);
2918 cifs_readdata_release(struct kref *refcount)
2920 struct cifs_readdata *rdata = container_of(refcount,
2921 struct cifs_readdata, refcount);
2924 cifsFileInfo_put(rdata->cfile);
2930 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2936 for (i = 0; i < nr_pages; i++) {
2937 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2942 rdata->pages[i] = page;
2946 for (i = 0; i < nr_pages; i++) {
2947 put_page(rdata->pages[i]);
2948 rdata->pages[i] = NULL;
2955 cifs_uncached_readdata_release(struct kref *refcount)
2957 struct cifs_readdata *rdata = container_of(refcount,
2958 struct cifs_readdata, refcount);
2961 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
2962 for (i = 0; i < rdata->nr_pages; i++) {
2963 put_page(rdata->pages[i]);
2964 rdata->pages[i] = NULL;
2966 cifs_readdata_release(refcount);
2970 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2971 * @rdata: the readdata response with list of pages holding data
2972 * @iter: destination for our data
2974 * This function copies data from a list of pages in a readdata response into
2975 * an array of iovecs. It will first calculate where the data should go
2976 * based on the info in the readdata and then copy the data into that spot.
2979 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2981 size_t remaining = rdata->got_bytes;
2984 for (i = 0; i < rdata->nr_pages; i++) {
2985 struct page *page = rdata->pages[i];
2986 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2989 if (unlikely(iter->type & ITER_PIPE)) {
2990 void *addr = kmap_atomic(page);
2992 written = copy_to_iter(addr, copy, iter);
2993 kunmap_atomic(addr);
2995 written = copy_page_to_iter(page, 0, copy, iter);
2996 remaining -= written;
2997 if (written < copy && iov_iter_count(iter) > 0)
3000 return remaining ? -EFAULT : 0;
3003 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3006 cifs_uncached_readv_complete(struct work_struct *work)
3008 struct cifs_readdata *rdata = container_of(work,
3009 struct cifs_readdata, work);
3011 complete(&rdata->done);
3012 collect_uncached_read_data(rdata->ctx);
3013 /* the below call can possibly free the last ref to aio ctx */
3014 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3018 uncached_fill_pages(struct TCP_Server_Info *server,
3019 struct cifs_readdata *rdata, struct iov_iter *iter,
3024 unsigned int nr_pages = rdata->nr_pages;
3026 rdata->got_bytes = 0;
3027 rdata->tailsz = PAGE_SIZE;
3028 for (i = 0; i < nr_pages; i++) {
3029 struct page *page = rdata->pages[i];
3033 /* no need to hold page hostage */
3034 rdata->pages[i] = NULL;
3040 if (len >= PAGE_SIZE) {
3041 /* enough data to fill the page */
3045 zero_user(page, len, PAGE_SIZE - len);
3046 rdata->tailsz = len;
3050 result = copy_page_from_iter(page, 0, n, iter);
3052 result = cifs_read_page_from_socket(server, page, n);
3056 rdata->got_bytes += result;
3059 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3060 rdata->got_bytes : result;
3064 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3065 struct cifs_readdata *rdata, unsigned int len)
3067 return uncached_fill_pages(server, rdata, NULL, len);
3071 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3072 struct cifs_readdata *rdata,
3073 struct iov_iter *iter)
3075 return uncached_fill_pages(server, rdata, iter, iter->count);
3079 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3080 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3081 struct cifs_aio_ctx *ctx)
3083 struct cifs_readdata *rdata;
3084 unsigned int npages, rsize, credits;
3088 struct TCP_Server_Info *server;
3090 server = tlink_tcon(open_file->tlink)->ses->server;
3092 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3093 pid = open_file->pid;
3095 pid = current->tgid;
3098 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3103 cur_len = min_t(const size_t, len, rsize);
3104 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3106 /* allocate a readdata struct */
3107 rdata = cifs_readdata_alloc(npages,
3108 cifs_uncached_readv_complete);
3110 add_credits_and_wake_if(server, credits, 0);
3115 rc = cifs_read_allocate_pages(rdata, npages);
3119 rdata->cfile = cifsFileInfo_get(open_file);
3120 rdata->nr_pages = npages;
3121 rdata->offset = offset;
3122 rdata->bytes = cur_len;
3124 rdata->pagesz = PAGE_SIZE;
3125 rdata->read_into_pages = cifs_uncached_read_into_pages;
3126 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3127 rdata->credits = credits;
3129 kref_get(&ctx->refcount);
3131 if (!rdata->cfile->invalidHandle ||
3132 !(rc = cifs_reopen_file(rdata->cfile, true)))
3133 rc = server->ops->async_readv(rdata);
3136 add_credits_and_wake_if(server, rdata->credits, 0);
3137 kref_put(&rdata->refcount,
3138 cifs_uncached_readdata_release);
3144 list_add_tail(&rdata->list, rdata_list);
3153 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3155 struct cifs_readdata *rdata, *tmp;
3156 struct iov_iter *to = &ctx->iter;
3157 struct cifs_sb_info *cifs_sb;
3158 struct cifs_tcon *tcon;
3162 tcon = tlink_tcon(ctx->cfile->tlink);
3163 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3165 mutex_lock(&ctx->aio_mutex);
3167 if (list_empty(&ctx->list)) {
3168 mutex_unlock(&ctx->aio_mutex);
3173 /* the loop below should proceed in the order of increasing offsets */
3175 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3177 if (!try_wait_for_completion(&rdata->done)) {
3178 mutex_unlock(&ctx->aio_mutex);
3182 if (rdata->result == -EAGAIN) {
3183 /* resend call if it's a retryable error */
3184 struct list_head tmp_list;
3185 unsigned int got_bytes = rdata->got_bytes;
3187 list_del_init(&rdata->list);
3188 INIT_LIST_HEAD(&tmp_list);
3191 * Got a part of data and then reconnect has
3192 * happened -- fill the buffer and continue
3195 if (got_bytes && got_bytes < rdata->bytes) {
3196 rc = cifs_readdata_to_iov(rdata, to);
3198 kref_put(&rdata->refcount,
3199 cifs_uncached_readdata_release);
3204 rc = cifs_send_async_read(
3205 rdata->offset + got_bytes,
3206 rdata->bytes - got_bytes,
3207 rdata->cfile, cifs_sb,
3210 list_splice(&tmp_list, &ctx->list);
3212 kref_put(&rdata->refcount,
3213 cifs_uncached_readdata_release);
3215 } else if (rdata->result)
3218 rc = cifs_readdata_to_iov(rdata, to);
3220 /* if there was a short read -- discard anything left */
3221 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3224 list_del_init(&rdata->list);
3225 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3228 for (i = 0; i < ctx->npages; i++) {
3229 if (ctx->should_dirty)
3230 set_page_dirty(ctx->bv[i].bv_page);
3231 put_page(ctx->bv[i].bv_page);
3234 ctx->total_len = ctx->len - iov_iter_count(to);
3236 cifs_stats_bytes_read(tcon, ctx->total_len);
3238 /* mask nodata case */
3242 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3244 mutex_unlock(&ctx->aio_mutex);
3246 if (ctx->iocb && ctx->iocb->ki_complete)
3247 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3249 complete(&ctx->done);
3252 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3254 struct file *file = iocb->ki_filp;
3257 ssize_t total_read = 0;
3258 loff_t offset = iocb->ki_pos;
3259 struct cifs_sb_info *cifs_sb;
3260 struct cifs_tcon *tcon;
3261 struct cifsFileInfo *cfile;
3262 struct cifs_aio_ctx *ctx;
3264 len = iov_iter_count(to);
3268 cifs_sb = CIFS_FILE_SB(file);
3269 cfile = file->private_data;
3270 tcon = tlink_tcon(cfile->tlink);
3272 if (!tcon->ses->server->ops->async_readv)
3275 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3276 cifs_dbg(FYI, "attempting read on write only file instance\n");
3278 ctx = cifs_aio_ctx_alloc();
3282 ctx->cfile = cifsFileInfo_get(cfile);
3284 if (!is_sync_kiocb(iocb))
3287 if (to->type == ITER_IOVEC)
3288 ctx->should_dirty = true;
3290 rc = setup_aio_ctx_iter(ctx, to, READ);
3292 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3298 /* grab a lock here due to read response handlers can access ctx */
3299 mutex_lock(&ctx->aio_mutex);
3301 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3303 /* if at least one read request send succeeded, then reset rc */
3304 if (!list_empty(&ctx->list))
3307 mutex_unlock(&ctx->aio_mutex);
3310 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3314 if (!is_sync_kiocb(iocb)) {
3315 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3316 return -EIOCBQUEUED;
3319 rc = wait_for_completion_killable(&ctx->done);
3321 mutex_lock(&ctx->aio_mutex);
3322 ctx->rc = rc = -EINTR;
3323 total_read = ctx->total_len;
3324 mutex_unlock(&ctx->aio_mutex);
3327 total_read = ctx->total_len;
3330 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3333 iocb->ki_pos += total_read;
3340 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3342 struct inode *inode = file_inode(iocb->ki_filp);
3343 struct cifsInodeInfo *cinode = CIFS_I(inode);
3344 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3345 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3346 iocb->ki_filp->private_data;
3347 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3351 * In strict cache mode we need to read from the server all the time
3352 * if we don't have level II oplock because the server can delay mtime
3353 * change - so we can't make a decision about inode invalidating.
3354 * And we can also fail with pagereading if there are mandatory locks
3355 * on pages affected by this read but not on the region from pos to
3358 if (!CIFS_CACHE_READ(cinode))
3359 return cifs_user_readv(iocb, to);
3361 if (cap_unix(tcon->ses) &&
3362 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3363 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3364 return generic_file_read_iter(iocb, to);
3367 * We need to hold the sem to be sure nobody modifies lock list
3368 * with a brlock that prevents reading.
3370 down_read(&cinode->lock_sem);
3371 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3372 tcon->ses->server->vals->shared_lock_type,
3373 NULL, CIFS_READ_OP))
3374 rc = generic_file_read_iter(iocb, to);
3375 up_read(&cinode->lock_sem);
3380 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3383 unsigned int bytes_read = 0;
3384 unsigned int total_read;
3385 unsigned int current_read_size;
3387 struct cifs_sb_info *cifs_sb;
3388 struct cifs_tcon *tcon;
3389 struct TCP_Server_Info *server;
3392 struct cifsFileInfo *open_file;
3393 struct cifs_io_parms io_parms;
3394 int buf_type = CIFS_NO_BUFFER;
3398 cifs_sb = CIFS_FILE_SB(file);
3400 /* FIXME: set up handlers for larger reads and/or convert to async */
3401 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3403 if (file->private_data == NULL) {
3408 open_file = file->private_data;
3409 tcon = tlink_tcon(open_file->tlink);
3410 server = tcon->ses->server;
3412 if (!server->ops->sync_read) {
3417 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3418 pid = open_file->pid;
3420 pid = current->tgid;
3422 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3423 cifs_dbg(FYI, "attempting read on write only file instance\n");
3425 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3426 total_read += bytes_read, cur_offset += bytes_read) {
3428 current_read_size = min_t(uint, read_size - total_read,
3431 * For windows me and 9x we do not want to request more
3432 * than it negotiated since it will refuse the read
3435 if ((tcon->ses) && !(tcon->ses->capabilities &
3436 tcon->ses->server->vals->cap_large_files)) {
3437 current_read_size = min_t(uint,
3438 current_read_size, CIFSMaxBufSize);
3440 if (open_file->invalidHandle) {
3441 rc = cifs_reopen_file(open_file, true);
3446 io_parms.tcon = tcon;
3447 io_parms.offset = *offset;
3448 io_parms.length = current_read_size;
3449 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3450 &bytes_read, &cur_offset,
3452 } while (rc == -EAGAIN);
3454 if (rc || (bytes_read == 0)) {
3462 cifs_stats_bytes_read(tcon, total_read);
3463 *offset += bytes_read;
3471 * If the page is mmap'ed into a process' page tables, then we need to make
3472 * sure that it doesn't change while being written back.
3475 cifs_page_mkwrite(struct vm_fault *vmf)
3477 struct page *page = vmf->page;
3480 return VM_FAULT_LOCKED;
3483 static const struct vm_operations_struct cifs_file_vm_ops = {
3484 .fault = filemap_fault,
3485 .map_pages = filemap_map_pages,
3486 .page_mkwrite = cifs_page_mkwrite,
3489 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3492 struct inode *inode = file_inode(file);
3496 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3497 rc = cifs_zap_mapping(inode);
3502 rc = generic_file_mmap(file, vma);
3504 vma->vm_ops = &cifs_file_vm_ops;
3509 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3514 rc = cifs_revalidate_file(file);
3516 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3521 rc = generic_file_mmap(file, vma);
3523 vma->vm_ops = &cifs_file_vm_ops;
3529 cifs_readv_complete(struct work_struct *work)
3531 unsigned int i, got_bytes;
3532 struct cifs_readdata *rdata = container_of(work,
3533 struct cifs_readdata, work);
3535 got_bytes = rdata->got_bytes;
3536 for (i = 0; i < rdata->nr_pages; i++) {
3537 struct page *page = rdata->pages[i];
3539 lru_cache_add_file(page);
3541 if (rdata->result == 0 ||
3542 (rdata->result == -EAGAIN && got_bytes)) {
3543 flush_dcache_page(page);
3544 SetPageUptodate(page);
3549 if (rdata->result == 0 ||
3550 (rdata->result == -EAGAIN && got_bytes))
3551 cifs_readpage_to_fscache(rdata->mapping->host, page);
3553 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3556 rdata->pages[i] = NULL;
3558 kref_put(&rdata->refcount, cifs_readdata_release);
3562 readpages_fill_pages(struct TCP_Server_Info *server,
3563 struct cifs_readdata *rdata, struct iov_iter *iter,
3570 unsigned int nr_pages = rdata->nr_pages;
3572 /* determine the eof that the server (probably) has */
3573 eof = CIFS_I(rdata->mapping->host)->server_eof;
3574 eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3575 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3577 rdata->got_bytes = 0;
3578 rdata->tailsz = PAGE_SIZE;
3579 for (i = 0; i < nr_pages; i++) {
3580 struct page *page = rdata->pages[i];
3581 size_t n = PAGE_SIZE;
3583 if (len >= PAGE_SIZE) {
3585 } else if (len > 0) {
3586 /* enough for partial page, fill and zero the rest */
3587 zero_user(page, len, PAGE_SIZE - len);
3588 n = rdata->tailsz = len;
3590 } else if (page->index > eof_index) {
3592 * The VFS will not try to do readahead past the
3593 * i_size, but it's possible that we have outstanding
3594 * writes with gaps in the middle and the i_size hasn't
3595 * caught up yet. Populate those with zeroed out pages
3596 * to prevent the VFS from repeatedly attempting to
3597 * fill them until the writes are flushed.
3599 zero_user(page, 0, PAGE_SIZE);
3600 lru_cache_add_file(page);
3601 flush_dcache_page(page);
3602 SetPageUptodate(page);
3605 rdata->pages[i] = NULL;
3609 /* no need to hold page hostage */
3610 lru_cache_add_file(page);
3613 rdata->pages[i] = NULL;
3619 result = copy_page_from_iter(page, 0, n, iter);
3621 result = cifs_read_page_from_socket(server, page, n);
3625 rdata->got_bytes += result;
3628 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3629 rdata->got_bytes : result;
3633 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3634 struct cifs_readdata *rdata, unsigned int len)
3636 return readpages_fill_pages(server, rdata, NULL, len);
3640 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3641 struct cifs_readdata *rdata,
3642 struct iov_iter *iter)
3644 return readpages_fill_pages(server, rdata, iter, iter->count);
3648 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3649 unsigned int rsize, struct list_head *tmplist,
3650 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3652 struct page *page, *tpage;
3653 unsigned int expected_index;
3655 gfp_t gfp = readahead_gfp_mask(mapping);
3657 INIT_LIST_HEAD(tmplist);
3659 page = list_entry(page_list->prev, struct page, lru);
3662 * Lock the page and put it in the cache. Since no one else
3663 * should have access to this page, we're safe to simply set
3664 * PG_locked without checking it first.
3666 __SetPageLocked(page);
3667 rc = add_to_page_cache_locked(page, mapping,
3670 /* give up if we can't stick it in the cache */
3672 __ClearPageLocked(page);
3676 /* move first page to the tmplist */
3677 *offset = (loff_t)page->index << PAGE_SHIFT;
3680 list_move_tail(&page->lru, tmplist);
3682 /* now try and add more pages onto the request */
3683 expected_index = page->index + 1;
3684 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3685 /* discontinuity ? */
3686 if (page->index != expected_index)
3689 /* would this page push the read over the rsize? */
3690 if (*bytes + PAGE_SIZE > rsize)
3693 __SetPageLocked(page);
3694 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3695 __ClearPageLocked(page);
3698 list_move_tail(&page->lru, tmplist);
3699 (*bytes) += PAGE_SIZE;
3706 static int cifs_readpages(struct file *file, struct address_space *mapping,
3707 struct list_head *page_list, unsigned num_pages)
3710 struct list_head tmplist;
3711 struct cifsFileInfo *open_file = file->private_data;
3712 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3713 struct TCP_Server_Info *server;
3717 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3718 * immediately if the cookie is negative
3720 * After this point, every page in the list might have PG_fscache set,
3721 * so we will need to clean that up off of every page we don't use.
3723 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3728 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3729 pid = open_file->pid;
3731 pid = current->tgid;
3734 server = tlink_tcon(open_file->tlink)->ses->server;
3736 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3737 __func__, file, mapping, num_pages);
3740 * Start with the page at end of list and move it to private
3741 * list. Do the same with any following pages until we hit
3742 * the rsize limit, hit an index discontinuity, or run out of
3743 * pages. Issue the async read and then start the loop again
3744 * until the list is empty.
3746 * Note that list order is important. The page_list is in
3747 * the order of declining indexes. When we put the pages in
3748 * the rdata->pages, then we want them in increasing order.
3750 while (!list_empty(page_list)) {
3751 unsigned int i, nr_pages, bytes, rsize;
3753 struct page *page, *tpage;
3754 struct cifs_readdata *rdata;
3757 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3763 * Give up immediately if rsize is too small to read an entire
3764 * page. The VFS will fall back to readpage. We should never
3765 * reach this point however since we set ra_pages to 0 when the
3766 * rsize is smaller than a cache page.
3768 if (unlikely(rsize < PAGE_SIZE)) {
3769 add_credits_and_wake_if(server, credits, 0);
3773 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3774 &nr_pages, &offset, &bytes);
3776 add_credits_and_wake_if(server, credits, 0);
3780 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3782 /* best to give up if we're out of mem */
3783 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3784 list_del(&page->lru);
3785 lru_cache_add_file(page);
3790 add_credits_and_wake_if(server, credits, 0);
3794 rdata->cfile = cifsFileInfo_get(open_file);
3795 rdata->mapping = mapping;
3796 rdata->offset = offset;
3797 rdata->bytes = bytes;
3799 rdata->pagesz = PAGE_SIZE;
3800 rdata->read_into_pages = cifs_readpages_read_into_pages;
3801 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3802 rdata->credits = credits;
3804 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3805 list_del(&page->lru);
3806 rdata->pages[rdata->nr_pages++] = page;
3809 if (!rdata->cfile->invalidHandle ||
3810 !(rc = cifs_reopen_file(rdata->cfile, true)))
3811 rc = server->ops->async_readv(rdata);
3813 add_credits_and_wake_if(server, rdata->credits, 0);
3814 for (i = 0; i < rdata->nr_pages; i++) {
3815 page = rdata->pages[i];
3816 lru_cache_add_file(page);
3820 /* Fallback to the readpage in error/reconnect cases */
3821 kref_put(&rdata->refcount, cifs_readdata_release);
3825 kref_put(&rdata->refcount, cifs_readdata_release);
3828 /* Any pages that have been shown to fscache but didn't get added to
3829 * the pagecache must be uncached before they get returned to the
3832 cifs_fscache_readpages_cancel(mapping->host, page_list);
3837 * cifs_readpage_worker must be called with the page pinned
3839 static int cifs_readpage_worker(struct file *file, struct page *page,
3845 /* Is the page cached? */
3846 rc = cifs_readpage_from_fscache(file_inode(file), page);
3850 read_data = kmap(page);
3851 /* for reads over a certain size could initiate async read ahead */
3853 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3858 cifs_dbg(FYI, "Bytes read %d\n", rc);
3860 file_inode(file)->i_atime =
3861 current_time(file_inode(file));
3864 memset(read_data + rc, 0, PAGE_SIZE - rc);
3866 flush_dcache_page(page);
3867 SetPageUptodate(page);
3869 /* send this page to the cache */
3870 cifs_readpage_to_fscache(file_inode(file), page);
3882 static int cifs_readpage(struct file *file, struct page *page)
3884 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3890 if (file->private_data == NULL) {
3896 cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3897 page, (int)offset, (int)offset);
3899 rc = cifs_readpage_worker(file, page, &offset);
3905 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3907 struct cifsFileInfo *open_file;
3908 struct cifs_tcon *tcon =
3909 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3911 spin_lock(&tcon->open_file_lock);
3912 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3913 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3914 spin_unlock(&tcon->open_file_lock);
3918 spin_unlock(&tcon->open_file_lock);
3922 /* We do not want to update the file size from server for inodes
3923 open for write - to avoid races with writepage extending
3924 the file - in the future we could consider allowing
3925 refreshing the inode only on increases in the file size
3926 but this is tricky to do without racing with writebehind
3927 page caching in the current Linux kernel design */
3928 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3933 if (is_inode_writable(cifsInode)) {
3934 /* This inode is open for write at least once */
3935 struct cifs_sb_info *cifs_sb;
3937 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3938 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3939 /* since no page cache to corrupt on directio
3940 we can change size safely */
3944 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3952 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3953 loff_t pos, unsigned len, unsigned flags,
3954 struct page **pagep, void **fsdata)
3957 pgoff_t index = pos >> PAGE_SHIFT;
3958 loff_t offset = pos & (PAGE_SIZE - 1);
3959 loff_t page_start = pos & PAGE_MASK;
3964 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3967 page = grab_cache_page_write_begin(mapping, index, flags);
3973 if (PageUptodate(page))
3977 * If we write a full page it will be up to date, no need to read from
3978 * the server. If the write is short, we'll end up doing a sync write
3981 if (len == PAGE_SIZE)
3985 * optimize away the read when we have an oplock, and we're not
3986 * expecting to use any of the data we'd be reading in. That
3987 * is, when the page lies beyond the EOF, or straddles the EOF
3988 * and the write will cover all of the existing data.
3990 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3991 i_size = i_size_read(mapping->host);
3992 if (page_start >= i_size ||
3993 (offset == 0 && (pos + len) >= i_size)) {
3994 zero_user_segments(page, 0, offset,
3998 * PageChecked means that the parts of the page
3999 * to which we're not writing are considered up
4000 * to date. Once the data is copied to the
4001 * page, it can be set uptodate.
4003 SetPageChecked(page);
4008 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4010 * might as well read a page, it is fast enough. If we get
4011 * an error, we don't need to return it. cifs_write_end will
4012 * do a sync write instead since PG_uptodate isn't set.
4014 cifs_readpage_worker(file, page, &page_start);
4019 /* we could try using another file handle if there is one -
4020 but how would we lock it to prevent close of that handle
4021 racing with this read? In any case
4022 this will be written out by write_end so is fine */
4029 static int cifs_release_page(struct page *page, gfp_t gfp)
4031 if (PagePrivate(page))
4034 return cifs_fscache_release_page(page, gfp);
4037 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4038 unsigned int length)
4040 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4042 if (offset == 0 && length == PAGE_SIZE)
4043 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4046 static int cifs_launder_page(struct page *page)
4049 loff_t range_start = page_offset(page);
4050 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4051 struct writeback_control wbc = {
4052 .sync_mode = WB_SYNC_ALL,
4054 .range_start = range_start,
4055 .range_end = range_end,
4058 cifs_dbg(FYI, "Launder page: %p\n", page);
4060 if (clear_page_dirty_for_io(page))
4061 rc = cifs_writepage_locked(page, &wbc);
4063 cifs_fscache_invalidate_page(page, page->mapping->host);
4067 void cifs_oplock_break(struct work_struct *work)
4069 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4071 struct inode *inode = d_inode(cfile->dentry);
4072 struct cifsInodeInfo *cinode = CIFS_I(inode);
4073 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4074 struct TCP_Server_Info *server = tcon->ses->server;
4077 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4078 TASK_UNINTERRUPTIBLE);
4080 server->ops->downgrade_oplock(server, cinode,
4081 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4083 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4084 cifs_has_mand_locks(cinode)) {
4085 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4090 if (inode && S_ISREG(inode->i_mode)) {
4091 if (CIFS_CACHE_READ(cinode))
4092 break_lease(inode, O_RDONLY);
4094 break_lease(inode, O_WRONLY);
4095 rc = filemap_fdatawrite(inode->i_mapping);
4096 if (!CIFS_CACHE_READ(cinode)) {
4097 rc = filemap_fdatawait(inode->i_mapping);
4098 mapping_set_error(inode->i_mapping, rc);
4099 cifs_zap_mapping(inode);
4101 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4104 rc = cifs_push_locks(cfile);
4106 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4109 * releasing stale oplock after recent reconnect of smb session using
4110 * a now incorrect file handle is not a data integrity issue but do
4111 * not bother sending an oplock release if session to server still is
4112 * disconnected since oplock already released by the server
4114 if (!cfile->oplock_break_cancelled) {
4115 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4117 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4119 cifs_done_oplock_break(cinode);
4123 * The presence of cifs_direct_io() in the address space ops vector
4124 * allowes open() O_DIRECT flags which would have failed otherwise.
4126 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4127 * so this method should never be called.
4129 * Direct IO is not yet supported in the cached mode.
4132 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4136 * Eventually need to support direct IO for non forcedirectio mounts
4142 const struct address_space_operations cifs_addr_ops = {
4143 .readpage = cifs_readpage,
4144 .readpages = cifs_readpages,
4145 .writepage = cifs_writepage,
4146 .writepages = cifs_writepages,
4147 .write_begin = cifs_write_begin,
4148 .write_end = cifs_write_end,
4149 .set_page_dirty = __set_page_dirty_nobuffers,
4150 .releasepage = cifs_release_page,
4151 .direct_IO = cifs_direct_io,
4152 .invalidatepage = cifs_invalidate_page,
4153 .launder_page = cifs_launder_page,
4157 * cifs_readpages requires the server to support a buffer large enough to
4158 * contain the header plus one complete page of data. Otherwise, we need
4159 * to leave cifs_readpages out of the address space operations.
4161 const struct address_space_operations cifs_addr_ops_smallbuf = {
4162 .readpage = cifs_readpage,
4163 .writepage = cifs_writepage,
4164 .writepages = cifs_writepages,
4165 .write_begin = cifs_write_begin,
4166 .write_end = cifs_write_end,
4167 .set_page_dirty = __set_page_dirty_nobuffers,
4168 .releasepage = cifs_release_page,
4169 .invalidatepage = cifs_invalidate_page,
4170 .launder_page = cifs_launder_page,