4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
45 static inline int cifs_convert_flags(unsigned int flags)
47 if ((flags & O_ACCMODE) == O_RDONLY)
49 else if ((flags & O_ACCMODE) == O_WRONLY)
51 else if ((flags & O_ACCMODE) == O_RDWR) {
52 /* GENERIC_ALL is too much permission to request
53 can cause unnecessary access denied on create */
54 /* return GENERIC_ALL; */
55 return (GENERIC_READ | GENERIC_WRITE);
58 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
59 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
65 fmode_t posix_flags = 0;
67 if ((flags & O_ACCMODE) == O_RDONLY)
68 posix_flags = FMODE_READ;
69 else if ((flags & O_ACCMODE) == O_WRONLY)
70 posix_flags = FMODE_WRITE;
71 else if ((flags & O_ACCMODE) == O_RDWR) {
72 /* GENERIC_ALL is too much permission to request
73 can cause unnecessary access denied on create */
74 /* return GENERIC_ALL; */
75 posix_flags = FMODE_READ | FMODE_WRITE;
77 /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
78 reopening a file. They had their effect on the original open */
80 posix_flags |= (fmode_t)O_APPEND;
82 posix_flags |= (fmode_t)O_DSYNC;
84 posix_flags |= (fmode_t)__O_SYNC;
85 if (flags & O_DIRECTORY)
86 posix_flags |= (fmode_t)O_DIRECTORY;
87 if (flags & O_NOFOLLOW)
88 posix_flags |= (fmode_t)O_NOFOLLOW;
90 posix_flags |= (fmode_t)O_DIRECT;
95 static inline int cifs_get_disposition(unsigned int flags)
97 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
99 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
100 return FILE_OVERWRITE_IF;
101 else if ((flags & O_CREAT) == O_CREAT)
103 else if ((flags & O_TRUNC) == O_TRUNC)
104 return FILE_OVERWRITE;
109 /* all arguments to this function must be checked for validity in caller */
111 cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
112 struct cifsInodeInfo *pCifsInode, __u32 oplock,
116 write_lock(&GlobalSMBSeslock);
118 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
119 if (pCifsInode == NULL) {
120 write_unlock(&GlobalSMBSeslock);
124 if (pCifsInode->clientCanCacheRead) {
125 /* we have the inode open somewhere else
126 no need to discard cache data */
127 goto psx_client_can_cache;
130 /* BB FIXME need to fix this check to move it earlier into posix_open
131 BB fIX following section BB FIXME */
133 /* if not oplocked, invalidate inode pages if mtime or file
135 /* temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
136 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
137 (file->f_path.dentry->d_inode->i_size ==
138 (loff_t)le64_to_cpu(buf->EndOfFile))) {
139 cFYI(1, "inode unchanged on server");
141 if (file->f_path.dentry->d_inode->i_mapping) {
142 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
144 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
146 cFYI(1, "invalidating remote inode since open detected it "
148 invalidate_remote_inode(file->f_path.dentry->d_inode);
151 psx_client_can_cache:
152 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
153 pCifsInode->clientCanCacheAll = true;
154 pCifsInode->clientCanCacheRead = true;
155 cFYI(1, "Exclusive Oplock granted on inode %p",
156 file->f_path.dentry->d_inode);
157 } else if ((oplock & 0xF) == OPLOCK_READ)
158 pCifsInode->clientCanCacheRead = true;
160 /* will have to change the unlock if we reenable the
161 filemap_fdatawrite (which does not seem necessary */
162 write_unlock(&GlobalSMBSeslock);
166 /* all arguments to this function must be checked for validity in caller */
167 static inline int cifs_open_inode_helper(struct inode *inode,
168 struct cifsTconInfo *pTcon, __u32 oplock, FILE_ALL_INFO *buf,
169 char *full_path, int xid)
171 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
172 struct timespec temp;
175 if (pCifsInode->clientCanCacheRead) {
176 /* we have the inode open somewhere else
177 no need to discard cache data */
178 goto client_can_cache;
181 /* BB need same check in cifs_create too? */
182 /* if not oplocked, invalidate inode pages if mtime or file
184 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
185 if (timespec_equal(&inode->i_mtime, &temp) &&
187 (loff_t)le64_to_cpu(buf->EndOfFile))) {
188 cFYI(1, "inode unchanged on server");
190 if (inode->i_mapping) {
191 /* BB no need to lock inode until after invalidate
192 since namei code should already have it locked? */
193 rc = filemap_write_and_wait(inode->i_mapping);
195 pCifsInode->write_behind_rc = rc;
197 cFYI(1, "invalidating remote inode since open detected it "
199 invalidate_remote_inode(inode);
204 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
207 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
210 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
211 pCifsInode->clientCanCacheAll = true;
212 pCifsInode->clientCanCacheRead = true;
213 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
214 } else if ((oplock & 0xF) == OPLOCK_READ)
215 pCifsInode->clientCanCacheRead = true;
220 int cifs_open(struct inode *inode, struct file *file)
225 struct cifs_sb_info *cifs_sb;
226 struct cifsTconInfo *tcon;
227 struct cifsFileInfo *pCifsFile = NULL;
228 struct cifsInodeInfo *pCifsInode;
229 char *full_path = NULL;
233 FILE_ALL_INFO *buf = NULL;
237 cifs_sb = CIFS_SB(inode->i_sb);
238 tcon = cifs_sb->tcon;
240 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
242 full_path = build_path_from_dentry(file->f_path.dentry);
243 if (full_path == NULL) {
248 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
249 inode, file->f_flags, full_path);
256 if (!tcon->broken_posix_open && tcon->unix_ext &&
257 (tcon->ses->capabilities & CAP_UNIX) &&
258 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
259 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
260 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
261 oflags |= SMB_O_CREAT;
262 /* can not refresh inode info since size could be stale */
263 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
264 cifs_sb->mnt_file_mode /* ignored */,
265 oflags, &oplock, &netfid, xid);
267 cFYI(1, "posix open succeeded");
268 /* no need for special case handling of setting mode
269 on read only files needed here */
271 rc = cifs_posix_open_inode_helper(inode, file,
272 pCifsInode, oplock, netfid);
274 CIFSSMBClose(xid, tcon, netfid);
278 pCifsFile = cifs_new_fileinfo(inode, netfid, file,
281 if (pCifsFile == NULL) {
282 CIFSSMBClose(xid, tcon, netfid);
286 cifs_fscache_set_inode_cookie(inode, file);
289 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
290 if (tcon->ses->serverNOS)
291 cERROR(1, "server %s of type %s returned"
292 " unexpected error on SMB posix open"
293 ", disabling posix open support."
294 " Check if server update available.",
295 tcon->ses->serverName,
296 tcon->ses->serverNOS);
297 tcon->broken_posix_open = true;
298 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
299 (rc != -EOPNOTSUPP)) /* path not found or net err */
301 /* else fallthrough to retry open the old way on network i/o
305 desiredAccess = cifs_convert_flags(file->f_flags);
307 /*********************************************************************
308 * open flag mapping table:
310 * POSIX Flag CIFS Disposition
311 * ---------- ----------------
312 * O_CREAT FILE_OPEN_IF
313 * O_CREAT | O_EXCL FILE_CREATE
314 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
315 * O_TRUNC FILE_OVERWRITE
316 * none of the above FILE_OPEN
318 * Note that there is not a direct match between disposition
319 * FILE_SUPERSEDE (ie create whether or not file exists although
320 * O_CREAT | O_TRUNC is similar but truncates the existing
321 * file rather than creating a new file as FILE_SUPERSEDE does
322 * (which uses the attributes / metadata passed in on open call)
324 *? O_SYNC is a reasonable match to CIFS writethrough flag
325 *? and the read write flags match reasonably. O_LARGEFILE
326 *? is irrelevant because largefile support is always used
327 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
328 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
329 *********************************************************************/
331 disposition = cifs_get_disposition(file->f_flags);
333 /* BB pass O_SYNC flag through on file attributes .. BB */
335 /* Also refresh inode by passing in file_info buf returned by SMBOpen
336 and calling get_inode_info with returned buf (at least helps
337 non-Unix server case) */
339 /* BB we can not do this if this is the second open of a file
340 and the first handle has writebehind data, we might be
341 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
342 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
348 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
349 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
350 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
351 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
352 & CIFS_MOUNT_MAP_SPECIAL_CHR);
354 rc = -EIO; /* no NT SMB support fall into legacy open below */
357 /* Old server, try legacy style OpenX */
358 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
359 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
360 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
361 & CIFS_MOUNT_MAP_SPECIAL_CHR);
364 cFYI(1, "cifs_open returned 0x%x", rc);
368 rc = cifs_open_inode_helper(inode, tcon, oplock, buf, full_path, xid);
372 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
373 file->f_flags, oplock);
374 if (pCifsFile == NULL) {
379 cifs_fscache_set_inode_cookie(inode, file);
381 if (oplock & CIFS_CREATE_ACTION) {
382 /* time to set mode which we can not set earlier due to
383 problems creating new read-only files */
384 if (tcon->unix_ext) {
385 struct cifs_unix_set_info_args args = {
386 .mode = inode->i_mode,
389 .ctime = NO_CHANGE_64,
390 .atime = NO_CHANGE_64,
391 .mtime = NO_CHANGE_64,
394 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
396 cifs_sb->mnt_cifs_flags &
397 CIFS_MOUNT_MAP_SPECIAL_CHR);
408 /* Try to reacquire byte range locks that were released when session */
409 /* to server was lost */
410 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
414 /* BB list all locks open on this file and relock */
419 static int cifs_reopen_file(struct file *file, bool can_flush)
424 struct cifs_sb_info *cifs_sb;
425 struct cifsTconInfo *tcon;
426 struct cifsFileInfo *pCifsFile;
427 struct cifsInodeInfo *pCifsInode;
429 char *full_path = NULL;
431 int disposition = FILE_OPEN;
434 if (file->private_data)
435 pCifsFile = file->private_data;
440 mutex_lock(&pCifsFile->fh_mutex);
441 if (!pCifsFile->invalidHandle) {
442 mutex_unlock(&pCifsFile->fh_mutex);
448 if (file->f_path.dentry == NULL) {
449 cERROR(1, "no valid name if dentry freed");
452 goto reopen_error_exit;
455 inode = file->f_path.dentry->d_inode;
457 cERROR(1, "inode not valid");
460 goto reopen_error_exit;
463 cifs_sb = CIFS_SB(inode->i_sb);
464 tcon = pCifsFile->tcon;
466 /* can not grab rename sem here because various ops, including
467 those that already have the rename sem can end up causing writepage
468 to get called and if the server was down that means we end up here,
469 and we can never tell if the caller already has the rename_sem */
470 full_path = build_path_from_dentry(file->f_path.dentry);
471 if (full_path == NULL) {
474 mutex_unlock(&pCifsFile->fh_mutex);
479 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
480 inode, file->f_flags, full_path);
487 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
488 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
489 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
490 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
491 /* can not refresh inode info since size could be stale */
492 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
493 cifs_sb->mnt_file_mode /* ignored */,
494 oflags, &oplock, &netfid, xid);
496 cFYI(1, "posix reopen succeeded");
499 /* fallthrough to retry open the old way on errors, especially
500 in the reconnect path it is important to retry hard */
503 desiredAccess = cifs_convert_flags(file->f_flags);
505 /* Can not refresh inode by passing in file_info buf to be returned
506 by SMBOpen and then calling get_inode_info with returned buf
507 since file might have write behind data that needs to be flushed
508 and server version of file size can be stale. If we knew for sure
509 that inode was not dirty locally we could do this */
511 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
512 CREATE_NOT_DIR, &netfid, &oplock, NULL,
513 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
514 CIFS_MOUNT_MAP_SPECIAL_CHR);
516 mutex_unlock(&pCifsFile->fh_mutex);
517 cFYI(1, "cifs_open returned 0x%x", rc);
518 cFYI(1, "oplock: %d", oplock);
521 pCifsFile->netfid = netfid;
522 pCifsFile->invalidHandle = false;
523 mutex_unlock(&pCifsFile->fh_mutex);
524 pCifsInode = CIFS_I(inode);
527 rc = filemap_write_and_wait(inode->i_mapping);
529 CIFS_I(inode)->write_behind_rc = rc;
530 /* temporarily disable caching while we
531 go to server to get inode info */
532 pCifsInode->clientCanCacheAll = false;
533 pCifsInode->clientCanCacheRead = false;
535 rc = cifs_get_inode_info_unix(&inode,
536 full_path, inode->i_sb, xid);
538 rc = cifs_get_inode_info(&inode,
539 full_path, NULL, inode->i_sb,
541 } /* else we are writing out data to server already
542 and could deadlock if we tried to flush data, and
543 since we do not know if we have data that would
544 invalidate the current end of file on the server
545 we can not go to the server to get the new inod
547 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
548 pCifsInode->clientCanCacheAll = true;
549 pCifsInode->clientCanCacheRead = true;
550 cFYI(1, "Exclusive Oplock granted on inode %p",
551 file->f_path.dentry->d_inode);
552 } else if ((oplock & 0xF) == OPLOCK_READ) {
553 pCifsInode->clientCanCacheRead = true;
554 pCifsInode->clientCanCacheAll = false;
556 pCifsInode->clientCanCacheRead = false;
557 pCifsInode->clientCanCacheAll = false;
559 cifs_relock_file(pCifsFile);
567 int cifs_close(struct inode *inode, struct file *file)
571 struct cifs_sb_info *cifs_sb;
572 struct cifsTconInfo *pTcon;
573 struct cifsFileInfo *pSMBFile = file->private_data;
577 cifs_sb = CIFS_SB(inode->i_sb);
578 pTcon = pSMBFile->tcon;
580 struct cifsLockInfo *li, *tmp;
581 write_lock(&GlobalSMBSeslock);
582 pSMBFile->closePend = true;
584 /* no sense reconnecting to close a file that is
586 if (!pTcon->need_reconnect) {
587 write_unlock(&GlobalSMBSeslock);
589 while ((atomic_read(&pSMBFile->count) != 1)
590 && (timeout <= 2048)) {
591 /* Give write a better chance to get to
592 server ahead of the close. We do not
593 want to add a wait_q here as it would
594 increase the memory utilization as
595 the struct would be in each open file,
596 but this should give enough time to
598 cFYI(DBG2, "close delay, write pending");
602 if (!pTcon->need_reconnect &&
603 !pSMBFile->invalidHandle)
604 rc = CIFSSMBClose(xid, pTcon,
607 write_unlock(&GlobalSMBSeslock);
609 write_unlock(&GlobalSMBSeslock);
611 /* Delete any outstanding lock records.
612 We'll lose them when the file is closed anyway. */
613 mutex_lock(&pSMBFile->lock_mutex);
614 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
615 list_del(&li->llist);
618 mutex_unlock(&pSMBFile->lock_mutex);
620 write_lock(&GlobalSMBSeslock);
621 list_del(&pSMBFile->flist);
622 list_del(&pSMBFile->tlist);
623 write_unlock(&GlobalSMBSeslock);
624 cifsFileInfo_put(file->private_data);
625 file->private_data = NULL;
629 read_lock(&GlobalSMBSeslock);
630 if (list_empty(&(CIFS_I(inode)->openFileList))) {
631 cFYI(1, "closing last open instance for inode %p", inode);
632 /* if the file is not open we do not know if we can cache info
633 on this inode, much less write behind and read ahead */
634 CIFS_I(inode)->clientCanCacheRead = false;
635 CIFS_I(inode)->clientCanCacheAll = false;
637 read_unlock(&GlobalSMBSeslock);
638 if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
639 rc = CIFS_I(inode)->write_behind_rc;
644 int cifs_closedir(struct inode *inode, struct file *file)
648 struct cifsFileInfo *pCFileStruct = file->private_data;
651 cFYI(1, "Closedir inode = 0x%p", inode);
656 struct cifsTconInfo *pTcon = pCFileStruct->tcon;
658 cFYI(1, "Freeing private data in close dir");
659 write_lock(&GlobalSMBSeslock);
660 if (!pCFileStruct->srch_inf.endOfSearch &&
661 !pCFileStruct->invalidHandle) {
662 pCFileStruct->invalidHandle = true;
663 write_unlock(&GlobalSMBSeslock);
664 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
665 cFYI(1, "Closing uncompleted readdir with rc %d",
667 /* not much we can do if it fails anyway, ignore rc */
670 write_unlock(&GlobalSMBSeslock);
671 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
673 cFYI(1, "closedir free smb buf in srch struct");
674 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
675 if (pCFileStruct->srch_inf.smallBuf)
676 cifs_small_buf_release(ptmp);
678 cifs_buf_release(ptmp);
680 kfree(file->private_data);
681 file->private_data = NULL;
683 /* BB can we lock the filestruct while this is going on? */
688 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
689 __u64 offset, __u8 lockType)
691 struct cifsLockInfo *li =
692 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
698 mutex_lock(&fid->lock_mutex);
699 list_add(&li->llist, &fid->llist);
700 mutex_unlock(&fid->lock_mutex);
704 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
710 bool wait_flag = false;
711 struct cifs_sb_info *cifs_sb;
712 struct cifsTconInfo *tcon;
714 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
715 bool posix_locking = 0;
717 length = 1 + pfLock->fl_end - pfLock->fl_start;
721 cFYI(1, "Lock parm: 0x%x flockflags: "
722 "0x%x flocktype: 0x%x start: %lld end: %lld",
723 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
726 if (pfLock->fl_flags & FL_POSIX)
728 if (pfLock->fl_flags & FL_FLOCK)
730 if (pfLock->fl_flags & FL_SLEEP) {
731 cFYI(1, "Blocking lock");
734 if (pfLock->fl_flags & FL_ACCESS)
735 cFYI(1, "Process suspended by mandatory locking - "
736 "not implemented yet");
737 if (pfLock->fl_flags & FL_LEASE)
738 cFYI(1, "Lease on file - not implemented yet");
739 if (pfLock->fl_flags &
740 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
741 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
743 if (pfLock->fl_type == F_WRLCK) {
746 } else if (pfLock->fl_type == F_UNLCK) {
749 /* Check if unlock includes more than
751 } else if (pfLock->fl_type == F_RDLCK) {
753 lockType |= LOCKING_ANDX_SHARED_LOCK;
755 } else if (pfLock->fl_type == F_EXLCK) {
758 } else if (pfLock->fl_type == F_SHLCK) {
760 lockType |= LOCKING_ANDX_SHARED_LOCK;
763 cFYI(1, "Unknown type of lock");
765 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
766 tcon = ((struct cifsFileInfo *)file->private_data)->tcon;
768 if (file->private_data == NULL) {
773 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
775 if ((tcon->ses->capabilities & CAP_UNIX) &&
776 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
777 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
779 /* BB add code here to normalize offset and length to
780 account for negative length which we can not accept over the
785 if (lockType & LOCKING_ANDX_SHARED_LOCK)
786 posix_lock_type = CIFS_RDLCK;
788 posix_lock_type = CIFS_WRLCK;
789 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
791 posix_lock_type, wait_flag);
796 /* BB we could chain these into one lock request BB */
797 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
798 0, 1, lockType, 0 /* wait flag */ );
800 rc = CIFSSMBLock(xid, tcon, netfid, length,
801 pfLock->fl_start, 1 /* numUnlock */ ,
802 0 /* numLock */ , lockType,
804 pfLock->fl_type = F_UNLCK;
806 cERROR(1, "Error unlocking previously locked "
807 "range %d during test of lock", rc);
811 /* if rc == ERR_SHARING_VIOLATION ? */
814 if (lockType & LOCKING_ANDX_SHARED_LOCK) {
815 pfLock->fl_type = F_WRLCK;
817 rc = CIFSSMBLock(xid, tcon, netfid, length,
818 pfLock->fl_start, 0, 1,
819 lockType | LOCKING_ANDX_SHARED_LOCK,
822 rc = CIFSSMBLock(xid, tcon, netfid,
823 length, pfLock->fl_start, 1, 0,
825 LOCKING_ANDX_SHARED_LOCK,
827 pfLock->fl_type = F_RDLCK;
829 cERROR(1, "Error unlocking "
830 "previously locked range %d "
831 "during test of lock", rc);
834 pfLock->fl_type = F_WRLCK;
844 if (!numLock && !numUnlock) {
845 /* if no lock or unlock then nothing
846 to do since we do not know what it is */
853 if (lockType & LOCKING_ANDX_SHARED_LOCK)
854 posix_lock_type = CIFS_RDLCK;
856 posix_lock_type = CIFS_WRLCK;
859 posix_lock_type = CIFS_UNLCK;
861 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
863 posix_lock_type, wait_flag);
865 struct cifsFileInfo *fid = file->private_data;
868 rc = CIFSSMBLock(xid, tcon, netfid, length,
870 0, numLock, lockType, wait_flag);
873 /* For Windows locks we must store them. */
874 rc = store_file_lock(fid, length,
875 pfLock->fl_start, lockType);
877 } else if (numUnlock) {
878 /* For each stored lock that this unlock overlaps
879 completely, unlock it. */
881 struct cifsLockInfo *li, *tmp;
884 mutex_lock(&fid->lock_mutex);
885 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
886 if (pfLock->fl_start <= li->offset &&
887 (pfLock->fl_start + length) >=
888 (li->offset + li->length)) {
889 stored_rc = CIFSSMBLock(xid, tcon,
891 li->length, li->offset,
892 1, 0, li->type, false);
896 list_del(&li->llist);
901 mutex_unlock(&fid->lock_mutex);
905 if (pfLock->fl_flags & FL_POSIX)
906 posix_lock_file_wait(file, pfLock);
912 * Set the timeout on write requests past EOF. For some servers (Windows)
913 * these calls can be very long.
915 * If we're writing >10M past the EOF we give a 180s timeout. Anything less
916 * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
917 * The 10M cutoff is totally arbitrary. A better scheme for this would be
918 * welcome if someone wants to suggest one.
920 * We may be able to do a better job with this if there were some way to
921 * declare that a file should be sparse.
924 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
926 if (offset <= cifsi->server_eof)
928 else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
929 return CIFS_VLONG_OP;
934 /* update the file size (if needed) after a write */
936 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
937 unsigned int bytes_written)
939 loff_t end_of_write = offset + bytes_written;
941 if (end_of_write > cifsi->server_eof)
942 cifsi->server_eof = end_of_write;
945 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
946 size_t write_size, loff_t *poffset)
949 unsigned int bytes_written = 0;
950 unsigned int total_written;
951 struct cifs_sb_info *cifs_sb;
952 struct cifsTconInfo *pTcon;
954 struct cifsFileInfo *open_file;
955 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
957 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
959 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
960 *poffset, file->f_path.dentry->d_name.name); */
962 if (file->private_data == NULL)
965 open_file = file->private_data;
966 pTcon = open_file->tcon;
968 rc = generic_write_checks(file, poffset, &write_size, 0);
974 long_op = cifs_write_timeout(cifsi, *poffset);
975 for (total_written = 0; write_size > total_written;
976 total_written += bytes_written) {
978 while (rc == -EAGAIN) {
979 if (file->private_data == NULL) {
980 /* file has been closed on us */
982 /* if we have gotten here we have written some data
983 and blocked, and the file has been freed on us while
984 we blocked so return what we managed to write */
985 return total_written;
987 if (open_file->closePend) {
990 return total_written;
994 if (open_file->invalidHandle) {
995 /* we could deadlock if we called
996 filemap_fdatawait from here so tell
997 reopen_file not to flush data to server
999 rc = cifs_reopen_file(file, false);
1004 rc = CIFSSMBWrite(xid, pTcon,
1006 min_t(const int, cifs_sb->wsize,
1007 write_size - total_written),
1008 *poffset, &bytes_written,
1009 NULL, write_data + total_written, long_op);
1011 if (rc || (bytes_written == 0)) {
1019 cifs_update_eof(cifsi, *poffset, bytes_written);
1020 *poffset += bytes_written;
1022 long_op = CIFS_STD_OP; /* subsequent writes fast -
1023 15 seconds is plenty */
1026 cifs_stats_bytes_written(pTcon, total_written);
1028 /* since the write may have blocked check these pointers again */
1029 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1030 struct inode *inode = file->f_path.dentry->d_inode;
1031 /* Do not update local mtime - server will set its actual value on write
1032 * inode->i_ctime = inode->i_mtime =
1033 * current_fs_time(inode->i_sb);*/
1034 if (total_written > 0) {
1035 spin_lock(&inode->i_lock);
1036 if (*poffset > file->f_path.dentry->d_inode->i_size)
1037 i_size_write(file->f_path.dentry->d_inode,
1039 spin_unlock(&inode->i_lock);
1041 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1044 return total_written;
1047 static ssize_t cifs_write(struct file *file, const char *write_data,
1048 size_t write_size, loff_t *poffset)
1051 unsigned int bytes_written = 0;
1052 unsigned int total_written;
1053 struct cifs_sb_info *cifs_sb;
1054 struct cifsTconInfo *pTcon;
1056 struct cifsFileInfo *open_file;
1057 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1059 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1061 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1062 *poffset, file->f_path.dentry->d_name.name);
1064 if (file->private_data == NULL)
1066 open_file = file->private_data;
1067 pTcon = open_file->tcon;
1071 long_op = cifs_write_timeout(cifsi, *poffset);
1072 for (total_written = 0; write_size > total_written;
1073 total_written += bytes_written) {
1075 while (rc == -EAGAIN) {
1076 if (file->private_data == NULL) {
1077 /* file has been closed on us */
1079 /* if we have gotten here we have written some data
1080 and blocked, and the file has been freed on us
1081 while we blocked so return what we managed to
1083 return total_written;
1085 if (open_file->closePend) {
1088 return total_written;
1092 if (open_file->invalidHandle) {
1093 /* we could deadlock if we called
1094 filemap_fdatawait from here so tell
1095 reopen_file not to flush data to
1097 rc = cifs_reopen_file(file, false);
1101 if (experimEnabled || (pTcon->ses->server &&
1102 ((pTcon->ses->server->secMode &
1103 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1108 len = min((size_t)cifs_sb->wsize,
1109 write_size - total_written);
1110 /* iov[0] is reserved for smb header */
1111 iov[1].iov_base = (char *)write_data +
1113 iov[1].iov_len = len;
1114 rc = CIFSSMBWrite2(xid, pTcon,
1115 open_file->netfid, len,
1116 *poffset, &bytes_written,
1119 rc = CIFSSMBWrite(xid, pTcon,
1121 min_t(const int, cifs_sb->wsize,
1122 write_size - total_written),
1123 *poffset, &bytes_written,
1124 write_data + total_written,
1127 if (rc || (bytes_written == 0)) {
1135 cifs_update_eof(cifsi, *poffset, bytes_written);
1136 *poffset += bytes_written;
1138 long_op = CIFS_STD_OP; /* subsequent writes fast -
1139 15 seconds is plenty */
1142 cifs_stats_bytes_written(pTcon, total_written);
1144 /* since the write may have blocked check these pointers again */
1145 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1146 /*BB We could make this contingent on superblock ATIME flag too */
1147 /* file->f_path.dentry->d_inode->i_ctime =
1148 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1149 if (total_written > 0) {
1150 spin_lock(&file->f_path.dentry->d_inode->i_lock);
1151 if (*poffset > file->f_path.dentry->d_inode->i_size)
1152 i_size_write(file->f_path.dentry->d_inode,
1154 spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1156 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1159 return total_written;
1162 #ifdef CONFIG_CIFS_EXPERIMENTAL
1163 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1165 struct cifsFileInfo *open_file = NULL;
1167 read_lock(&GlobalSMBSeslock);
1168 /* we could simply get the first_list_entry since write-only entries
1169 are always at the end of the list but since the first entry might
1170 have a close pending, we go through the whole list */
1171 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1172 if (open_file->closePend)
1174 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1175 (open_file->pfile->f_flags & O_RDONLY))) {
1176 if (!open_file->invalidHandle) {
1177 /* found a good file */
1178 /* lock it so it will not be closed on us */
1179 cifsFileInfo_get(open_file);
1180 read_unlock(&GlobalSMBSeslock);
1182 } /* else might as well continue, and look for
1183 another, or simply have the caller reopen it
1184 again rather than trying to fix this handle */
1185 } else /* write only file */
1186 break; /* write only files are last so must be done */
1188 read_unlock(&GlobalSMBSeslock);
1193 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1195 struct cifsFileInfo *open_file;
1196 bool any_available = false;
1199 /* Having a null inode here (because mapping->host was set to zero by
1200 the VFS or MM) should not happen but we had reports of on oops (due to
1201 it being zero) during stress testcases so we need to check for it */
1203 if (cifs_inode == NULL) {
1204 cERROR(1, "Null inode passed to cifs_writeable_file");
1209 read_lock(&GlobalSMBSeslock);
1211 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1212 if (open_file->closePend ||
1213 (!any_available && open_file->pid != current->tgid))
1216 if (open_file->pfile &&
1217 ((open_file->pfile->f_flags & O_RDWR) ||
1218 (open_file->pfile->f_flags & O_WRONLY))) {
1219 cifsFileInfo_get(open_file);
1221 if (!open_file->invalidHandle) {
1222 /* found a good writable file */
1223 read_unlock(&GlobalSMBSeslock);
1227 read_unlock(&GlobalSMBSeslock);
1228 /* Had to unlock since following call can block */
1229 rc = cifs_reopen_file(open_file->pfile, false);
1231 if (!open_file->closePend)
1233 else { /* start over in case this was deleted */
1234 /* since the list could be modified */
1235 read_lock(&GlobalSMBSeslock);
1236 cifsFileInfo_put(open_file);
1237 goto refind_writable;
1241 /* if it fails, try another handle if possible -
1242 (we can not do this if closePending since
1243 loop could be modified - in which case we
1244 have to start at the beginning of the list
1245 again. Note that it would be bad
1246 to hold up writepages here (rather than
1247 in caller) with continuous retries */
1248 cFYI(1, "wp failed on reopen file");
1249 read_lock(&GlobalSMBSeslock);
1250 /* can not use this handle, no write
1251 pending on this one after all */
1252 cifsFileInfo_put(open_file);
1254 if (open_file->closePend) /* list could have changed */
1255 goto refind_writable;
1256 /* else we simply continue to the next entry. Thus
1257 we do not loop on reopen errors. If we
1258 can not reopen the file, for example if we
1259 reconnected to a server with another client
1260 racing to delete or lock the file we would not
1261 make progress if we restarted before the beginning
1262 of the loop here. */
1265 /* couldn't find useable FH with same pid, try any available */
1266 if (!any_available) {
1267 any_available = true;
1268 goto refind_writable;
1270 read_unlock(&GlobalSMBSeslock);
1274 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1276 struct address_space *mapping = page->mapping;
1277 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1280 int bytes_written = 0;
1281 struct cifs_sb_info *cifs_sb;
1282 struct inode *inode;
1283 struct cifsFileInfo *open_file;
1285 if (!mapping || !mapping->host)
1288 inode = page->mapping->host;
1289 cifs_sb = CIFS_SB(inode->i_sb);
1291 offset += (loff_t)from;
1292 write_data = kmap(page);
1295 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1300 /* racing with truncate? */
1301 if (offset > mapping->host->i_size) {
1303 return 0; /* don't care */
1306 /* check to make sure that we are not extending the file */
1307 if (mapping->host->i_size - offset < (loff_t)to)
1308 to = (unsigned)(mapping->host->i_size - offset);
1310 open_file = find_writable_file(CIFS_I(mapping->host));
1312 bytes_written = cifs_write(open_file->pfile, write_data,
1314 cifsFileInfo_put(open_file);
1315 /* Does mm or vfs already set times? */
1316 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1317 if ((bytes_written > 0) && (offset))
1319 else if (bytes_written < 0)
1322 cFYI(1, "No writeable filehandles for inode");
1330 static int cifs_writepages(struct address_space *mapping,
1331 struct writeback_control *wbc)
1333 struct backing_dev_info *bdi = mapping->backing_dev_info;
1334 unsigned int bytes_to_write;
1335 unsigned int bytes_written;
1336 struct cifs_sb_info *cifs_sb;
1340 int range_whole = 0;
1347 struct cifsFileInfo *open_file;
1348 struct cifsTconInfo *tcon;
1349 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1351 struct pagevec pvec;
1356 cifs_sb = CIFS_SB(mapping->host->i_sb);
1359 * If wsize is smaller that the page cache size, default to writing
1360 * one page at a time via cifs_writepage
1362 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1363 return generic_writepages(mapping, wbc);
1365 if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1366 if (cifs_sb->tcon->ses->server->secMode &
1367 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1368 if (!experimEnabled)
1369 return generic_writepages(mapping, wbc);
1371 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1373 return generic_writepages(mapping, wbc);
1377 * BB: Is this meaningful for a non-block-device file system?
1378 * If it is, we should test it again after we do I/O
1380 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1381 wbc->encountered_congestion = 1;
1388 pagevec_init(&pvec, 0);
1389 if (wbc->range_cyclic) {
1390 index = mapping->writeback_index; /* Start from prev offset */
1393 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1394 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1395 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1400 while (!done && (index <= end) &&
1401 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1402 PAGECACHE_TAG_DIRTY,
1403 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1412 for (i = 0; i < nr_pages; i++) {
1413 page = pvec.pages[i];
1415 * At this point we hold neither mapping->tree_lock nor
1416 * lock on the page itself: the page may be truncated or
1417 * invalidated (changing page->mapping to NULL), or even
1418 * swizzled back from swapper_space to tmpfs file
1424 else if (!trylock_page(page))
1427 if (unlikely(page->mapping != mapping)) {
1432 if (!wbc->range_cyclic && page->index > end) {
1438 if (next && (page->index != next)) {
1439 /* Not next consecutive page */
1444 if (wbc->sync_mode != WB_SYNC_NONE)
1445 wait_on_page_writeback(page);
1447 if (PageWriteback(page) ||
1448 !clear_page_dirty_for_io(page)) {
1454 * This actually clears the dirty bit in the radix tree.
1455 * See cifs_writepage() for more commentary.
1457 set_page_writeback(page);
1459 if (page_offset(page) >= mapping->host->i_size) {
1462 end_page_writeback(page);
1467 * BB can we get rid of this? pages are held by pvec
1469 page_cache_get(page);
1471 len = min(mapping->host->i_size - page_offset(page),
1472 (loff_t)PAGE_CACHE_SIZE);
1474 /* reserve iov[0] for the smb header */
1476 iov[n_iov].iov_base = kmap(page);
1477 iov[n_iov].iov_len = len;
1478 bytes_to_write += len;
1482 offset = page_offset(page);
1484 next = page->index + 1;
1485 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1489 /* Search for a writable handle every time we call
1490 * CIFSSMBWrite2. We can't rely on the last handle
1491 * we used to still be valid
1493 open_file = find_writable_file(CIFS_I(mapping->host));
1495 cERROR(1, "No writable handles for inode");
1498 tcon = open_file->tcon;
1499 long_op = cifs_write_timeout(cifsi, offset);
1500 rc = CIFSSMBWrite2(xid, tcon,
1502 bytes_to_write, offset,
1503 &bytes_written, iov, n_iov,
1505 cifsFileInfo_put(open_file);
1506 cifs_update_eof(cifsi, offset, bytes_written);
1508 if (rc || bytes_written < bytes_to_write) {
1509 cERROR(1, "Write2 ret %d, wrote %d",
1511 /* BB what if continued retry is
1512 requested via mount flags? */
1514 set_bit(AS_ENOSPC, &mapping->flags);
1516 set_bit(AS_EIO, &mapping->flags);
1518 cifs_stats_bytes_written(tcon, bytes_written);
1521 for (i = 0; i < n_iov; i++) {
1522 page = pvec.pages[first + i];
1523 /* Should we also set page error on
1524 success rc but too little data written? */
1525 /* BB investigate retry logic on temporary
1526 server crash cases and how recovery works
1527 when page marked as error */
1532 end_page_writeback(page);
1533 page_cache_release(page);
1535 if ((wbc->nr_to_write -= n_iov) <= 0)
1539 /* Need to re-find the pages we skipped */
1540 index = pvec.pages[0]->index + 1;
1542 pagevec_release(&pvec);
1544 if (!scanned && !done) {
1546 * We hit the last page and there is more work to be done: wrap
1547 * back to the start of the file
1553 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1554 mapping->writeback_index = index;
1561 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1567 /* BB add check for wbc flags */
1568 page_cache_get(page);
1569 if (!PageUptodate(page))
1570 cFYI(1, "ppw - page not up to date");
1573 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1575 * A writepage() implementation always needs to do either this,
1576 * or re-dirty the page with "redirty_page_for_writepage()" in
1577 * the case of a failure.
1579 * Just unlocking the page will cause the radix tree tag-bits
1580 * to fail to update with the state of the page correctly.
1582 set_page_writeback(page);
1583 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1584 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1586 end_page_writeback(page);
1587 page_cache_release(page);
1592 static int cifs_write_end(struct file *file, struct address_space *mapping,
1593 loff_t pos, unsigned len, unsigned copied,
1594 struct page *page, void *fsdata)
1597 struct inode *inode = mapping->host;
1599 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1602 if (PageChecked(page)) {
1604 SetPageUptodate(page);
1605 ClearPageChecked(page);
1606 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1607 SetPageUptodate(page);
1609 if (!PageUptodate(page)) {
1611 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1615 /* this is probably better than directly calling
1616 partialpage_write since in this function the file handle is
1617 known which we might as well leverage */
1618 /* BB check if anything else missing out of ppw
1619 such as updating last write time */
1620 page_data = kmap(page);
1621 rc = cifs_write(file, page_data + offset, copied, &pos);
1622 /* if (rc < 0) should we set writebehind rc? */
1629 set_page_dirty(page);
1633 spin_lock(&inode->i_lock);
1634 if (pos > inode->i_size)
1635 i_size_write(inode, pos);
1636 spin_unlock(&inode->i_lock);
1640 page_cache_release(page);
1645 int cifs_fsync(struct file *file, int datasync)
1649 struct cifsTconInfo *tcon;
1650 struct cifsFileInfo *smbfile = file->private_data;
1651 struct inode *inode = file->f_path.dentry->d_inode;
1655 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1656 file->f_path.dentry->d_name.name, datasync);
1658 rc = filemap_write_and_wait(inode->i_mapping);
1660 rc = CIFS_I(inode)->write_behind_rc;
1661 CIFS_I(inode)->write_behind_rc = 0;
1662 tcon = smbfile->tcon;
1663 if (!rc && tcon && smbfile &&
1664 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1665 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1672 /* static void cifs_sync_page(struct page *page)
1674 struct address_space *mapping;
1675 struct inode *inode;
1676 unsigned long index = page->index;
1677 unsigned int rpages = 0;
1680 cFYI(1, "sync page %p", page);
1681 mapping = page->mapping;
1684 inode = mapping->host;
1688 /* fill in rpages then
1689 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1691 /* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1701 * As file closes, flush all cached write data for this inode checking
1702 * for write behind errors.
1704 int cifs_flush(struct file *file, fl_owner_t id)
1706 struct inode *inode = file->f_path.dentry->d_inode;
1709 /* Rather than do the steps manually:
1710 lock the inode for writing
1711 loop through pages looking for write behind data (dirty pages)
1712 coalesce into contiguous 16K (or smaller) chunks to write to server
1713 send to server (prefer in parallel)
1714 deal with writebehind errors
1715 unlock inode for writing
1716 filemapfdatawrite appears easier for the time being */
1718 rc = filemap_fdatawrite(inode->i_mapping);
1719 /* reset wb rc if we were able to write out dirty pages */
1721 rc = CIFS_I(inode)->write_behind_rc;
1722 CIFS_I(inode)->write_behind_rc = 0;
1725 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1730 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1731 size_t read_size, loff_t *poffset)
1734 unsigned int bytes_read = 0;
1735 unsigned int total_read = 0;
1736 unsigned int current_read_size;
1737 struct cifs_sb_info *cifs_sb;
1738 struct cifsTconInfo *pTcon;
1740 struct cifsFileInfo *open_file;
1741 char *smb_read_data;
1742 char __user *current_offset;
1743 struct smb_com_read_rsp *pSMBr;
1746 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1748 if (file->private_data == NULL) {
1753 open_file = file->private_data;
1754 pTcon = open_file->tcon;
1756 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1757 cFYI(1, "attempting read on write only file instance");
1759 for (total_read = 0, current_offset = read_data;
1760 read_size > total_read;
1761 total_read += bytes_read, current_offset += bytes_read) {
1762 current_read_size = min_t(const int, read_size - total_read,
1765 smb_read_data = NULL;
1766 while (rc == -EAGAIN) {
1767 int buf_type = CIFS_NO_BUFFER;
1768 if ((open_file->invalidHandle) &&
1769 (!open_file->closePend)) {
1770 rc = cifs_reopen_file(file, true);
1774 rc = CIFSSMBRead(xid, pTcon,
1776 current_read_size, *poffset,
1777 &bytes_read, &smb_read_data,
1779 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1780 if (smb_read_data) {
1781 if (copy_to_user(current_offset,
1783 4 /* RFC1001 length field */ +
1784 le16_to_cpu(pSMBr->DataOffset),
1788 if (buf_type == CIFS_SMALL_BUFFER)
1789 cifs_small_buf_release(smb_read_data);
1790 else if (buf_type == CIFS_LARGE_BUFFER)
1791 cifs_buf_release(smb_read_data);
1792 smb_read_data = NULL;
1795 if (rc || (bytes_read == 0)) {
1803 cifs_stats_bytes_read(pTcon, bytes_read);
1804 *poffset += bytes_read;
1812 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1816 unsigned int bytes_read = 0;
1817 unsigned int total_read;
1818 unsigned int current_read_size;
1819 struct cifs_sb_info *cifs_sb;
1820 struct cifsTconInfo *pTcon;
1822 char *current_offset;
1823 struct cifsFileInfo *open_file;
1824 int buf_type = CIFS_NO_BUFFER;
1827 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1829 if (file->private_data == NULL) {
1834 open_file = file->private_data;
1835 pTcon = open_file->tcon;
1837 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1838 cFYI(1, "attempting read on write only file instance");
1840 for (total_read = 0, current_offset = read_data;
1841 read_size > total_read;
1842 total_read += bytes_read, current_offset += bytes_read) {
1843 current_read_size = min_t(const int, read_size - total_read,
1845 /* For windows me and 9x we do not want to request more
1846 than it negotiated since it will refuse the read then */
1848 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1849 current_read_size = min_t(const int, current_read_size,
1850 pTcon->ses->server->maxBuf - 128);
1853 while (rc == -EAGAIN) {
1854 if ((open_file->invalidHandle) &&
1855 (!open_file->closePend)) {
1856 rc = cifs_reopen_file(file, true);
1860 rc = CIFSSMBRead(xid, pTcon,
1862 current_read_size, *poffset,
1863 &bytes_read, ¤t_offset,
1866 if (rc || (bytes_read == 0)) {
1874 cifs_stats_bytes_read(pTcon, total_read);
1875 *poffset += bytes_read;
1882 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1887 rc = cifs_revalidate_file(file);
1889 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1893 rc = generic_file_mmap(file, vma);
1899 static void cifs_copy_cache_pages(struct address_space *mapping,
1900 struct list_head *pages, int bytes_read, char *data)
1905 while (bytes_read > 0) {
1906 if (list_empty(pages))
1909 page = list_entry(pages->prev, struct page, lru);
1910 list_del(&page->lru);
1912 if (add_to_page_cache_lru(page, mapping, page->index,
1914 page_cache_release(page);
1915 cFYI(1, "Add page cache failed");
1916 data += PAGE_CACHE_SIZE;
1917 bytes_read -= PAGE_CACHE_SIZE;
1920 page_cache_release(page);
1922 target = kmap_atomic(page, KM_USER0);
1924 if (PAGE_CACHE_SIZE > bytes_read) {
1925 memcpy(target, data, bytes_read);
1926 /* zero the tail end of this partial page */
1927 memset(target + bytes_read, 0,
1928 PAGE_CACHE_SIZE - bytes_read);
1931 memcpy(target, data, PAGE_CACHE_SIZE);
1932 bytes_read -= PAGE_CACHE_SIZE;
1934 kunmap_atomic(target, KM_USER0);
1936 flush_dcache_page(page);
1937 SetPageUptodate(page);
1939 data += PAGE_CACHE_SIZE;
1941 /* add page to FS-Cache */
1942 cifs_readpage_to_fscache(mapping->host, page);
1947 static int cifs_readpages(struct file *file, struct address_space *mapping,
1948 struct list_head *page_list, unsigned num_pages)
1954 struct cifs_sb_info *cifs_sb;
1955 struct cifsTconInfo *pTcon;
1956 unsigned int bytes_read = 0;
1957 unsigned int read_size, i;
1958 char *smb_read_data = NULL;
1959 struct smb_com_read_rsp *pSMBr;
1960 struct cifsFileInfo *open_file;
1961 int buf_type = CIFS_NO_BUFFER;
1964 if (file->private_data == NULL) {
1969 open_file = file->private_data;
1970 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1971 pTcon = open_file->tcon;
1974 * Reads as many pages as possible from fscache. Returns -ENOBUFS
1975 * immediately if the cookie is negative
1977 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
1982 cFYI(DBG2, "rpages: num pages %d", num_pages);
1983 for (i = 0; i < num_pages; ) {
1984 unsigned contig_pages;
1985 struct page *tmp_page;
1986 unsigned long expected_index;
1988 if (list_empty(page_list))
1991 page = list_entry(page_list->prev, struct page, lru);
1992 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1994 /* count adjacent pages that we will read into */
1997 list_entry(page_list->prev, struct page, lru)->index;
1998 list_for_each_entry_reverse(tmp_page, page_list, lru) {
1999 if (tmp_page->index == expected_index) {
2005 if (contig_pages + i > num_pages)
2006 contig_pages = num_pages - i;
2008 /* for reads over a certain size could initiate async
2011 read_size = contig_pages * PAGE_CACHE_SIZE;
2012 /* Read size needs to be in multiples of one page */
2013 read_size = min_t(const unsigned int, read_size,
2014 cifs_sb->rsize & PAGE_CACHE_MASK);
2015 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
2016 read_size, contig_pages);
2018 while (rc == -EAGAIN) {
2019 if ((open_file->invalidHandle) &&
2020 (!open_file->closePend)) {
2021 rc = cifs_reopen_file(file, true);
2026 rc = CIFSSMBRead(xid, pTcon,
2029 &bytes_read, &smb_read_data,
2031 /* BB more RC checks ? */
2032 if (rc == -EAGAIN) {
2033 if (smb_read_data) {
2034 if (buf_type == CIFS_SMALL_BUFFER)
2035 cifs_small_buf_release(smb_read_data);
2036 else if (buf_type == CIFS_LARGE_BUFFER)
2037 cifs_buf_release(smb_read_data);
2038 smb_read_data = NULL;
2042 if ((rc < 0) || (smb_read_data == NULL)) {
2043 cFYI(1, "Read error in readpages: %d", rc);
2045 } else if (bytes_read > 0) {
2046 task_io_account_read(bytes_read);
2047 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2048 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2049 smb_read_data + 4 /* RFC1001 hdr */ +
2050 le16_to_cpu(pSMBr->DataOffset));
2052 i += bytes_read >> PAGE_CACHE_SHIFT;
2053 cifs_stats_bytes_read(pTcon, bytes_read);
2054 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2055 i++; /* account for partial page */
2057 /* server copy of file can have smaller size
2059 /* BB do we need to verify this common case ?
2060 this case is ok - if we are at server EOF
2061 we will hit it on next read */
2066 cFYI(1, "No bytes read (%d) at offset %lld . "
2067 "Cleaning remaining pages from readahead list",
2068 bytes_read, offset);
2069 /* BB turn off caching and do new lookup on
2070 file size at server? */
2073 if (smb_read_data) {
2074 if (buf_type == CIFS_SMALL_BUFFER)
2075 cifs_small_buf_release(smb_read_data);
2076 else if (buf_type == CIFS_LARGE_BUFFER)
2077 cifs_buf_release(smb_read_data);
2078 smb_read_data = NULL;
2083 /* need to free smb_read_data buf before exit */
2084 if (smb_read_data) {
2085 if (buf_type == CIFS_SMALL_BUFFER)
2086 cifs_small_buf_release(smb_read_data);
2087 else if (buf_type == CIFS_LARGE_BUFFER)
2088 cifs_buf_release(smb_read_data);
2089 smb_read_data = NULL;
2097 static int cifs_readpage_worker(struct file *file, struct page *page,
2103 /* Is the page cached? */
2104 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2108 page_cache_get(page);
2109 read_data = kmap(page);
2110 /* for reads over a certain size could initiate async read ahead */
2112 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2117 cFYI(1, "Bytes read %d", rc);
2119 file->f_path.dentry->d_inode->i_atime =
2120 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2122 if (PAGE_CACHE_SIZE > rc)
2123 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2125 flush_dcache_page(page);
2126 SetPageUptodate(page);
2128 /* send this page to the cache */
2129 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2135 page_cache_release(page);
2141 static int cifs_readpage(struct file *file, struct page *page)
2143 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2149 if (file->private_data == NULL) {
2155 cFYI(1, "readpage %p at offset %d 0x%x\n",
2156 page, (int)offset, (int)offset);
2158 rc = cifs_readpage_worker(file, page, &offset);
2166 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2168 struct cifsFileInfo *open_file;
2170 read_lock(&GlobalSMBSeslock);
2171 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2172 if (open_file->closePend)
2174 if (open_file->pfile &&
2175 ((open_file->pfile->f_flags & O_RDWR) ||
2176 (open_file->pfile->f_flags & O_WRONLY))) {
2177 read_unlock(&GlobalSMBSeslock);
2181 read_unlock(&GlobalSMBSeslock);
2185 /* We do not want to update the file size from server for inodes
2186 open for write - to avoid races with writepage extending
2187 the file - in the future we could consider allowing
2188 refreshing the inode only on increases in the file size
2189 but this is tricky to do without racing with writebehind
2190 page caching in the current Linux kernel design */
2191 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2196 if (is_inode_writable(cifsInode)) {
2197 /* This inode is open for write at least once */
2198 struct cifs_sb_info *cifs_sb;
2200 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2201 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2202 /* since no page cache to corrupt on directio
2203 we can change size safely */
2207 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2215 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2216 loff_t pos, unsigned len, unsigned flags,
2217 struct page **pagep, void **fsdata)
2219 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2220 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2221 loff_t page_start = pos & PAGE_MASK;
2226 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2228 page = grab_cache_page_write_begin(mapping, index, flags);
2234 if (PageUptodate(page))
2238 * If we write a full page it will be up to date, no need to read from
2239 * the server. If the write is short, we'll end up doing a sync write
2242 if (len == PAGE_CACHE_SIZE)
2246 * optimize away the read when we have an oplock, and we're not
2247 * expecting to use any of the data we'd be reading in. That
2248 * is, when the page lies beyond the EOF, or straddles the EOF
2249 * and the write will cover all of the existing data.
2251 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2252 i_size = i_size_read(mapping->host);
2253 if (page_start >= i_size ||
2254 (offset == 0 && (pos + len) >= i_size)) {
2255 zero_user_segments(page, 0, offset,
2259 * PageChecked means that the parts of the page
2260 * to which we're not writing are considered up
2261 * to date. Once the data is copied to the
2262 * page, it can be set uptodate.
2264 SetPageChecked(page);
2269 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2271 * might as well read a page, it is fast enough. If we get
2272 * an error, we don't need to return it. cifs_write_end will
2273 * do a sync write instead since PG_uptodate isn't set.
2275 cifs_readpage_worker(file, page, &page_start);
2277 /* we could try using another file handle if there is one -
2278 but how would we lock it to prevent close of that handle
2279 racing with this read? In any case
2280 this will be written out by write_end so is fine */
2287 static int cifs_release_page(struct page *page, gfp_t gfp)
2289 if (PagePrivate(page))
2292 return cifs_fscache_release_page(page, gfp);
2295 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2297 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2300 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2303 void cifs_oplock_break(struct work_struct *work)
2305 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2307 struct inode *inode = cfile->pInode;
2308 struct cifsInodeInfo *cinode = CIFS_I(inode);
2311 if (inode && S_ISREG(inode->i_mode)) {
2312 if (cinode->clientCanCacheRead)
2313 break_lease(inode, O_RDONLY);
2315 break_lease(inode, O_WRONLY);
2316 rc = filemap_fdatawrite(inode->i_mapping);
2317 if (cinode->clientCanCacheRead == 0) {
2318 waitrc = filemap_fdatawait(inode->i_mapping);
2319 invalidate_remote_inode(inode);
2324 cinode->write_behind_rc = rc;
2325 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2329 * releasing stale oplock after recent reconnect of smb session using
2330 * a now incorrect file handle is not a data integrity issue but do
2331 * not bother sending an oplock release if session to server still is
2332 * disconnected since oplock already released by the server
2334 if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2335 rc = CIFSSMBLock(0, cfile->tcon, cfile->netfid, 0, 0, 0, 0,
2336 LOCKING_ANDX_OPLOCK_RELEASE, false);
2337 cFYI(1, "Oplock release rc = %d", rc);
2341 * We might have kicked in before is_valid_oplock_break()
2342 * finished grabbing reference for us. Make sure it's done by
2343 * waiting for GlobalSMSSeslock.
2345 write_lock(&GlobalSMBSeslock);
2346 write_unlock(&GlobalSMBSeslock);
2348 cifs_oplock_break_put(cfile);
2351 void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2354 cifsFileInfo_get(cfile);
2357 void cifs_oplock_break_put(struct cifsFileInfo *cfile)
2360 cifsFileInfo_put(cfile);
2363 const struct address_space_operations cifs_addr_ops = {
2364 .readpage = cifs_readpage,
2365 .readpages = cifs_readpages,
2366 .writepage = cifs_writepage,
2367 .writepages = cifs_writepages,
2368 .write_begin = cifs_write_begin,
2369 .write_end = cifs_write_end,
2370 .set_page_dirty = __set_page_dirty_nobuffers,
2371 .releasepage = cifs_release_page,
2372 .invalidatepage = cifs_invalidate_page,
2373 /* .sync_page = cifs_sync_page, */
2378 * cifs_readpages requires the server to support a buffer large enough to
2379 * contain the header plus one complete page of data. Otherwise, we need
2380 * to leave cifs_readpages out of the address space operations.
2382 const struct address_space_operations cifs_addr_ops_smallbuf = {
2383 .readpage = cifs_readpage,
2384 .writepage = cifs_writepage,
2385 .writepages = cifs_writepages,
2386 .write_begin = cifs_write_begin,
2387 .write_end = cifs_write_end,
2388 .set_page_dirty = __set_page_dirty_nobuffers,
2389 .releasepage = cifs_release_page,
2390 .invalidatepage = cifs_invalidate_page,
2391 /* .sync_page = cifs_sync_page, */