]> Git Repo - linux.git/blob - fs/cifs/file.c
MIPS: Simplify FP context initialization
[linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French ([email protected])
8  *              Jeremy Allison ([email protected])
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45 #include "smbdirect.h"
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228         if (f_flags & O_SYNC)
229                 create_options |= CREATE_WRITE_THROUGH;
230
231         if (f_flags & O_DIRECT)
232                 create_options |= CREATE_NO_BUFFER;
233
234         oparms.tcon = tcon;
235         oparms.cifs_sb = cifs_sb;
236         oparms.desired_access = desired_access;
237         oparms.create_options = create_options;
238         oparms.disposition = disposition;
239         oparms.path = full_path;
240         oparms.fid = fid;
241         oparms.reconnect = false;
242
243         rc = server->ops->open(xid, &oparms, oplock, buf);
244
245         if (rc)
246                 goto out;
247
248         if (tcon->unix_ext)
249                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
250                                               xid);
251         else
252                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
253                                          xid, fid);
254
255 out:
256         kfree(buf);
257         return rc;
258 }
259
260 static bool
261 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
262 {
263         struct cifs_fid_locks *cur;
264         bool has_locks = false;
265
266         down_read(&cinode->lock_sem);
267         list_for_each_entry(cur, &cinode->llist, llist) {
268                 if (!list_empty(&cur->locks)) {
269                         has_locks = true;
270                         break;
271                 }
272         }
273         up_read(&cinode->lock_sem);
274         return has_locks;
275 }
276
277 struct cifsFileInfo *
278 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
279                   struct tcon_link *tlink, __u32 oplock)
280 {
281         struct dentry *dentry = file_dentry(file);
282         struct inode *inode = d_inode(dentry);
283         struct cifsInodeInfo *cinode = CIFS_I(inode);
284         struct cifsFileInfo *cfile;
285         struct cifs_fid_locks *fdlocks;
286         struct cifs_tcon *tcon = tlink_tcon(tlink);
287         struct TCP_Server_Info *server = tcon->ses->server;
288
289         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
290         if (cfile == NULL)
291                 return cfile;
292
293         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
294         if (!fdlocks) {
295                 kfree(cfile);
296                 return NULL;
297         }
298
299         INIT_LIST_HEAD(&fdlocks->locks);
300         fdlocks->cfile = cfile;
301         cfile->llist = fdlocks;
302         down_write(&cinode->lock_sem);
303         list_add(&fdlocks->llist, &cinode->llist);
304         up_write(&cinode->lock_sem);
305
306         cfile->count = 1;
307         cfile->pid = current->tgid;
308         cfile->uid = current_fsuid();
309         cfile->dentry = dget(dentry);
310         cfile->f_flags = file->f_flags;
311         cfile->invalidHandle = false;
312         cfile->tlink = cifs_get_tlink(tlink);
313         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
314         mutex_init(&cfile->fh_mutex);
315         spin_lock_init(&cfile->file_info_lock);
316
317         cifs_sb_active(inode->i_sb);
318
319         /*
320          * If the server returned a read oplock and we have mandatory brlocks,
321          * set oplock level to None.
322          */
323         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
324                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
325                 oplock = 0;
326         }
327
328         spin_lock(&tcon->open_file_lock);
329         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
330                 oplock = fid->pending_open->oplock;
331         list_del(&fid->pending_open->olist);
332
333         fid->purge_cache = false;
334         server->ops->set_fid(cfile, fid, oplock);
335
336         list_add(&cfile->tlist, &tcon->openFileList);
337         atomic_inc(&tcon->num_local_opens);
338
339         /* if readable file instance put first in list*/
340         if (file->f_mode & FMODE_READ)
341                 list_add(&cfile->flist, &cinode->openFileList);
342         else
343                 list_add_tail(&cfile->flist, &cinode->openFileList);
344         spin_unlock(&tcon->open_file_lock);
345
346         if (fid->purge_cache)
347                 cifs_zap_mapping(inode);
348
349         file->private_data = cfile;
350         return cfile;
351 }
352
353 struct cifsFileInfo *
354 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
355 {
356         spin_lock(&cifs_file->file_info_lock);
357         cifsFileInfo_get_locked(cifs_file);
358         spin_unlock(&cifs_file->file_info_lock);
359         return cifs_file;
360 }
361
362 /*
363  * Release a reference on the file private data. This may involve closing
364  * the filehandle out on the server. Must be called without holding
365  * tcon->open_file_lock and cifs_file->file_info_lock.
366  */
367 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
368 {
369         struct inode *inode = d_inode(cifs_file->dentry);
370         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
371         struct TCP_Server_Info *server = tcon->ses->server;
372         struct cifsInodeInfo *cifsi = CIFS_I(inode);
373         struct super_block *sb = inode->i_sb;
374         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
375         struct cifsLockInfo *li, *tmp;
376         struct cifs_fid fid;
377         struct cifs_pending_open open;
378         bool oplock_break_cancelled;
379
380         spin_lock(&tcon->open_file_lock);
381
382         spin_lock(&cifs_file->file_info_lock);
383         if (--cifs_file->count > 0) {
384                 spin_unlock(&cifs_file->file_info_lock);
385                 spin_unlock(&tcon->open_file_lock);
386                 return;
387         }
388         spin_unlock(&cifs_file->file_info_lock);
389
390         if (server->ops->get_lease_key)
391                 server->ops->get_lease_key(inode, &fid);
392
393         /* store open in pending opens to make sure we don't miss lease break */
394         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
395
396         /* remove it from the lists */
397         list_del(&cifs_file->flist);
398         list_del(&cifs_file->tlist);
399         atomic_dec(&tcon->num_local_opens);
400
401         if (list_empty(&cifsi->openFileList)) {
402                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
403                          d_inode(cifs_file->dentry));
404                 /*
405                  * In strict cache mode we need invalidate mapping on the last
406                  * close  because it may cause a error when we open this file
407                  * again and get at least level II oplock.
408                  */
409                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
410                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
411                 cifs_set_oplock_level(cifsi, 0);
412         }
413
414         spin_unlock(&tcon->open_file_lock);
415
416         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
417
418         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
419                 struct TCP_Server_Info *server = tcon->ses->server;
420                 unsigned int xid;
421
422                 xid = get_xid();
423                 if (server->ops->close)
424                         server->ops->close(xid, tcon, &cifs_file->fid);
425                 _free_xid(xid);
426         }
427
428         if (oplock_break_cancelled)
429                 cifs_done_oplock_break(cifsi);
430
431         cifs_del_pending_open(&open);
432
433         /*
434          * Delete any outstanding lock records. We'll lose them when the file
435          * is closed anyway.
436          */
437         down_write(&cifsi->lock_sem);
438         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
439                 list_del(&li->llist);
440                 cifs_del_lock_waiters(li);
441                 kfree(li);
442         }
443         list_del(&cifs_file->llist->llist);
444         kfree(cifs_file->llist);
445         up_write(&cifsi->lock_sem);
446
447         cifs_put_tlink(cifs_file->tlink);
448         dput(cifs_file->dentry);
449         cifs_sb_deactive(sb);
450         kfree(cifs_file);
451 }
452
453 int cifs_open(struct inode *inode, struct file *file)
454
455 {
456         int rc = -EACCES;
457         unsigned int xid;
458         __u32 oplock;
459         struct cifs_sb_info *cifs_sb;
460         struct TCP_Server_Info *server;
461         struct cifs_tcon *tcon;
462         struct tcon_link *tlink;
463         struct cifsFileInfo *cfile = NULL;
464         char *full_path = NULL;
465         bool posix_open_ok = false;
466         struct cifs_fid fid;
467         struct cifs_pending_open open;
468
469         xid = get_xid();
470
471         cifs_sb = CIFS_SB(inode->i_sb);
472         tlink = cifs_sb_tlink(cifs_sb);
473         if (IS_ERR(tlink)) {
474                 free_xid(xid);
475                 return PTR_ERR(tlink);
476         }
477         tcon = tlink_tcon(tlink);
478         server = tcon->ses->server;
479
480         full_path = build_path_from_dentry(file_dentry(file));
481         if (full_path == NULL) {
482                 rc = -ENOMEM;
483                 goto out;
484         }
485
486         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
487                  inode, file->f_flags, full_path);
488
489         if (file->f_flags & O_DIRECT &&
490             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
491                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
492                         file->f_op = &cifs_file_direct_nobrl_ops;
493                 else
494                         file->f_op = &cifs_file_direct_ops;
495         }
496
497         if (server->oplocks)
498                 oplock = REQ_OPLOCK;
499         else
500                 oplock = 0;
501
502         if (!tcon->broken_posix_open && tcon->unix_ext &&
503             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
504                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
505                 /* can not refresh inode info since size could be stale */
506                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
507                                 cifs_sb->mnt_file_mode /* ignored */,
508                                 file->f_flags, &oplock, &fid.netfid, xid);
509                 if (rc == 0) {
510                         cifs_dbg(FYI, "posix open succeeded\n");
511                         posix_open_ok = true;
512                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
513                         if (tcon->ses->serverNOS)
514                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
515                                          tcon->ses->serverName,
516                                          tcon->ses->serverNOS);
517                         tcon->broken_posix_open = true;
518                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
519                          (rc != -EOPNOTSUPP)) /* path not found or net err */
520                         goto out;
521                 /*
522                  * Else fallthrough to retry open the old way on network i/o
523                  * or DFS errors.
524                  */
525         }
526
527         if (server->ops->get_lease_key)
528                 server->ops->get_lease_key(inode, &fid);
529
530         cifs_add_pending_open(&fid, tlink, &open);
531
532         if (!posix_open_ok) {
533                 if (server->ops->get_lease_key)
534                         server->ops->get_lease_key(inode, &fid);
535
536                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
537                                   file->f_flags, &oplock, &fid, xid);
538                 if (rc) {
539                         cifs_del_pending_open(&open);
540                         goto out;
541                 }
542         }
543
544         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
545         if (cfile == NULL) {
546                 if (server->ops->close)
547                         server->ops->close(xid, tcon, &fid);
548                 cifs_del_pending_open(&open);
549                 rc = -ENOMEM;
550                 goto out;
551         }
552
553         cifs_fscache_set_inode_cookie(inode, file);
554
555         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
556                 /*
557                  * Time to set mode which we can not set earlier due to
558                  * problems creating new read-only files.
559                  */
560                 struct cifs_unix_set_info_args args = {
561                         .mode   = inode->i_mode,
562                         .uid    = INVALID_UID, /* no change */
563                         .gid    = INVALID_GID, /* no change */
564                         .ctime  = NO_CHANGE_64,
565                         .atime  = NO_CHANGE_64,
566                         .mtime  = NO_CHANGE_64,
567                         .device = 0,
568                 };
569                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
570                                        cfile->pid);
571         }
572
573 out:
574         kfree(full_path);
575         free_xid(xid);
576         cifs_put_tlink(tlink);
577         return rc;
578 }
579
580 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
581
582 /*
583  * Try to reacquire byte range locks that were released when session
584  * to server was lost.
585  */
586 static int
587 cifs_relock_file(struct cifsFileInfo *cfile)
588 {
589         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
590         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
591         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
592         int rc = 0;
593
594         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
595         if (cinode->can_cache_brlcks) {
596                 /* can cache locks - no need to relock */
597                 up_read(&cinode->lock_sem);
598                 return rc;
599         }
600
601         if (cap_unix(tcon->ses) &&
602             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
603             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
604                 rc = cifs_push_posix_locks(cfile);
605         else
606                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
607
608         up_read(&cinode->lock_sem);
609         return rc;
610 }
611
612 static int
613 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
614 {
615         int rc = -EACCES;
616         unsigned int xid;
617         __u32 oplock;
618         struct cifs_sb_info *cifs_sb;
619         struct cifs_tcon *tcon;
620         struct TCP_Server_Info *server;
621         struct cifsInodeInfo *cinode;
622         struct inode *inode;
623         char *full_path = NULL;
624         int desired_access;
625         int disposition = FILE_OPEN;
626         int create_options = CREATE_NOT_DIR;
627         struct cifs_open_parms oparms;
628
629         xid = get_xid();
630         mutex_lock(&cfile->fh_mutex);
631         if (!cfile->invalidHandle) {
632                 mutex_unlock(&cfile->fh_mutex);
633                 rc = 0;
634                 free_xid(xid);
635                 return rc;
636         }
637
638         inode = d_inode(cfile->dentry);
639         cifs_sb = CIFS_SB(inode->i_sb);
640         tcon = tlink_tcon(cfile->tlink);
641         server = tcon->ses->server;
642
643         /*
644          * Can not grab rename sem here because various ops, including those
645          * that already have the rename sem can end up causing writepage to get
646          * called and if the server was down that means we end up here, and we
647          * can never tell if the caller already has the rename_sem.
648          */
649         full_path = build_path_from_dentry(cfile->dentry);
650         if (full_path == NULL) {
651                 rc = -ENOMEM;
652                 mutex_unlock(&cfile->fh_mutex);
653                 free_xid(xid);
654                 return rc;
655         }
656
657         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
658                  inode, cfile->f_flags, full_path);
659
660         if (tcon->ses->server->oplocks)
661                 oplock = REQ_OPLOCK;
662         else
663                 oplock = 0;
664
665         if (tcon->unix_ext && cap_unix(tcon->ses) &&
666             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
667                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
668                 /*
669                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
670                  * original open. Must mask them off for a reopen.
671                  */
672                 unsigned int oflags = cfile->f_flags &
673                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
674
675                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
676                                      cifs_sb->mnt_file_mode /* ignored */,
677                                      oflags, &oplock, &cfile->fid.netfid, xid);
678                 if (rc == 0) {
679                         cifs_dbg(FYI, "posix reopen succeeded\n");
680                         oparms.reconnect = true;
681                         goto reopen_success;
682                 }
683                 /*
684                  * fallthrough to retry open the old way on errors, especially
685                  * in the reconnect path it is important to retry hard
686                  */
687         }
688
689         desired_access = cifs_convert_flags(cfile->f_flags);
690
691         if (backup_cred(cifs_sb))
692                 create_options |= CREATE_OPEN_BACKUP_INTENT;
693
694         if (server->ops->get_lease_key)
695                 server->ops->get_lease_key(inode, &cfile->fid);
696
697         oparms.tcon = tcon;
698         oparms.cifs_sb = cifs_sb;
699         oparms.desired_access = desired_access;
700         oparms.create_options = create_options;
701         oparms.disposition = disposition;
702         oparms.path = full_path;
703         oparms.fid = &cfile->fid;
704         oparms.reconnect = true;
705
706         /*
707          * Can not refresh inode by passing in file_info buf to be returned by
708          * ops->open and then calling get_inode_info with returned buf since
709          * file might have write behind data that needs to be flushed and server
710          * version of file size can be stale. If we knew for sure that inode was
711          * not dirty locally we could do this.
712          */
713         rc = server->ops->open(xid, &oparms, &oplock, NULL);
714         if (rc == -ENOENT && oparms.reconnect == false) {
715                 /* durable handle timeout is expired - open the file again */
716                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
717                 /* indicate that we need to relock the file */
718                 oparms.reconnect = true;
719         }
720
721         if (rc) {
722                 mutex_unlock(&cfile->fh_mutex);
723                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
724                 cifs_dbg(FYI, "oplock: %d\n", oplock);
725                 goto reopen_error_exit;
726         }
727
728 reopen_success:
729         cfile->invalidHandle = false;
730         mutex_unlock(&cfile->fh_mutex);
731         cinode = CIFS_I(inode);
732
733         if (can_flush) {
734                 rc = filemap_write_and_wait(inode->i_mapping);
735                 mapping_set_error(inode->i_mapping, rc);
736
737                 if (tcon->unix_ext)
738                         rc = cifs_get_inode_info_unix(&inode, full_path,
739                                                       inode->i_sb, xid);
740                 else
741                         rc = cifs_get_inode_info(&inode, full_path, NULL,
742                                                  inode->i_sb, xid, NULL);
743         }
744         /*
745          * Else we are writing out data to server already and could deadlock if
746          * we tried to flush data, and since we do not know if we have data that
747          * would invalidate the current end of file on the server we can not go
748          * to the server to get the new inode info.
749          */
750
751         /*
752          * If the server returned a read oplock and we have mandatory brlocks,
753          * set oplock level to None.
754          */
755         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
756                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
757                 oplock = 0;
758         }
759
760         server->ops->set_fid(cfile, &cfile->fid, oplock);
761         if (oparms.reconnect)
762                 cifs_relock_file(cfile);
763
764 reopen_error_exit:
765         kfree(full_path);
766         free_xid(xid);
767         return rc;
768 }
769
770 int cifs_close(struct inode *inode, struct file *file)
771 {
772         if (file->private_data != NULL) {
773                 cifsFileInfo_put(file->private_data);
774                 file->private_data = NULL;
775         }
776
777         /* return code from the ->release op is always ignored */
778         return 0;
779 }
780
781 void
782 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
783 {
784         struct cifsFileInfo *open_file;
785         struct list_head *tmp;
786         struct list_head *tmp1;
787         struct list_head tmp_list;
788
789         if (!tcon->use_persistent || !tcon->need_reopen_files)
790                 return;
791
792         tcon->need_reopen_files = false;
793
794         cifs_dbg(FYI, "Reopen persistent handles");
795         INIT_LIST_HEAD(&tmp_list);
796
797         /* list all files open on tree connection, reopen resilient handles  */
798         spin_lock(&tcon->open_file_lock);
799         list_for_each(tmp, &tcon->openFileList) {
800                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
801                 if (!open_file->invalidHandle)
802                         continue;
803                 cifsFileInfo_get(open_file);
804                 list_add_tail(&open_file->rlist, &tmp_list);
805         }
806         spin_unlock(&tcon->open_file_lock);
807
808         list_for_each_safe(tmp, tmp1, &tmp_list) {
809                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
810                 if (cifs_reopen_file(open_file, false /* do not flush */))
811                         tcon->need_reopen_files = true;
812                 list_del_init(&open_file->rlist);
813                 cifsFileInfo_put(open_file);
814         }
815 }
816
817 int cifs_closedir(struct inode *inode, struct file *file)
818 {
819         int rc = 0;
820         unsigned int xid;
821         struct cifsFileInfo *cfile = file->private_data;
822         struct cifs_tcon *tcon;
823         struct TCP_Server_Info *server;
824         char *buf;
825
826         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
827
828         if (cfile == NULL)
829                 return rc;
830
831         xid = get_xid();
832         tcon = tlink_tcon(cfile->tlink);
833         server = tcon->ses->server;
834
835         cifs_dbg(FYI, "Freeing private data in close dir\n");
836         spin_lock(&cfile->file_info_lock);
837         if (server->ops->dir_needs_close(cfile)) {
838                 cfile->invalidHandle = true;
839                 spin_unlock(&cfile->file_info_lock);
840                 if (server->ops->close_dir)
841                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
842                 else
843                         rc = -ENOSYS;
844                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
845                 /* not much we can do if it fails anyway, ignore rc */
846                 rc = 0;
847         } else
848                 spin_unlock(&cfile->file_info_lock);
849
850         buf = cfile->srch_inf.ntwrk_buf_start;
851         if (buf) {
852                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
853                 cfile->srch_inf.ntwrk_buf_start = NULL;
854                 if (cfile->srch_inf.smallBuf)
855                         cifs_small_buf_release(buf);
856                 else
857                         cifs_buf_release(buf);
858         }
859
860         cifs_put_tlink(cfile->tlink);
861         kfree(file->private_data);
862         file->private_data = NULL;
863         /* BB can we lock the filestruct while this is going on? */
864         free_xid(xid);
865         return rc;
866 }
867
868 static struct cifsLockInfo *
869 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
870 {
871         struct cifsLockInfo *lock =
872                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
873         if (!lock)
874                 return lock;
875         lock->offset = offset;
876         lock->length = length;
877         lock->type = type;
878         lock->pid = current->tgid;
879         lock->flags = flags;
880         INIT_LIST_HEAD(&lock->blist);
881         init_waitqueue_head(&lock->block_q);
882         return lock;
883 }
884
885 void
886 cifs_del_lock_waiters(struct cifsLockInfo *lock)
887 {
888         struct cifsLockInfo *li, *tmp;
889         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
890                 list_del_init(&li->blist);
891                 wake_up(&li->block_q);
892         }
893 }
894
895 #define CIFS_LOCK_OP    0
896 #define CIFS_READ_OP    1
897 #define CIFS_WRITE_OP   2
898
899 /* @rw_check : 0 - no op, 1 - read, 2 - write */
900 static bool
901 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
902                             __u64 length, __u8 type, __u16 flags,
903                             struct cifsFileInfo *cfile,
904                             struct cifsLockInfo **conf_lock, int rw_check)
905 {
906         struct cifsLockInfo *li;
907         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
908         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
909
910         list_for_each_entry(li, &fdlocks->locks, llist) {
911                 if (offset + length <= li->offset ||
912                     offset >= li->offset + li->length)
913                         continue;
914                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
915                     server->ops->compare_fids(cfile, cur_cfile)) {
916                         /* shared lock prevents write op through the same fid */
917                         if (!(li->type & server->vals->shared_lock_type) ||
918                             rw_check != CIFS_WRITE_OP)
919                                 continue;
920                 }
921                 if ((type & server->vals->shared_lock_type) &&
922                     ((server->ops->compare_fids(cfile, cur_cfile) &&
923                      current->tgid == li->pid) || type == li->type))
924                         continue;
925                 if (rw_check == CIFS_LOCK_OP &&
926                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
927                     server->ops->compare_fids(cfile, cur_cfile))
928                         continue;
929                 if (conf_lock)
930                         *conf_lock = li;
931                 return true;
932         }
933         return false;
934 }
935
936 bool
937 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
938                         __u8 type, __u16 flags,
939                         struct cifsLockInfo **conf_lock, int rw_check)
940 {
941         bool rc = false;
942         struct cifs_fid_locks *cur;
943         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
944
945         list_for_each_entry(cur, &cinode->llist, llist) {
946                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
947                                                  flags, cfile, conf_lock,
948                                                  rw_check);
949                 if (rc)
950                         break;
951         }
952
953         return rc;
954 }
955
956 /*
957  * Check if there is another lock that prevents us to set the lock (mandatory
958  * style). If such a lock exists, update the flock structure with its
959  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
960  * or leave it the same if we can't. Returns 0 if we don't need to request to
961  * the server or 1 otherwise.
962  */
963 static int
964 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
965                __u8 type, struct file_lock *flock)
966 {
967         int rc = 0;
968         struct cifsLockInfo *conf_lock;
969         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
970         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
971         bool exist;
972
973         down_read(&cinode->lock_sem);
974
975         exist = cifs_find_lock_conflict(cfile, offset, length, type,
976                                         flock->fl_flags, &conf_lock,
977                                         CIFS_LOCK_OP);
978         if (exist) {
979                 flock->fl_start = conf_lock->offset;
980                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
981                 flock->fl_pid = conf_lock->pid;
982                 if (conf_lock->type & server->vals->shared_lock_type)
983                         flock->fl_type = F_RDLCK;
984                 else
985                         flock->fl_type = F_WRLCK;
986         } else if (!cinode->can_cache_brlcks)
987                 rc = 1;
988         else
989                 flock->fl_type = F_UNLCK;
990
991         up_read(&cinode->lock_sem);
992         return rc;
993 }
994
995 static void
996 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
997 {
998         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
999         down_write(&cinode->lock_sem);
1000         list_add_tail(&lock->llist, &cfile->llist->locks);
1001         up_write(&cinode->lock_sem);
1002 }
1003
1004 /*
1005  * Set the byte-range lock (mandatory style). Returns:
1006  * 1) 0, if we set the lock and don't need to request to the server;
1007  * 2) 1, if no locks prevent us but we need to request to the server;
1008  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1009  */
1010 static int
1011 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1012                  bool wait)
1013 {
1014         struct cifsLockInfo *conf_lock;
1015         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1016         bool exist;
1017         int rc = 0;
1018
1019 try_again:
1020         exist = false;
1021         down_write(&cinode->lock_sem);
1022
1023         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1024                                         lock->type, lock->flags, &conf_lock,
1025                                         CIFS_LOCK_OP);
1026         if (!exist && cinode->can_cache_brlcks) {
1027                 list_add_tail(&lock->llist, &cfile->llist->locks);
1028                 up_write(&cinode->lock_sem);
1029                 return rc;
1030         }
1031
1032         if (!exist)
1033                 rc = 1;
1034         else if (!wait)
1035                 rc = -EACCES;
1036         else {
1037                 list_add_tail(&lock->blist, &conf_lock->blist);
1038                 up_write(&cinode->lock_sem);
1039                 rc = wait_event_interruptible(lock->block_q,
1040                                         (lock->blist.prev == &lock->blist) &&
1041                                         (lock->blist.next == &lock->blist));
1042                 if (!rc)
1043                         goto try_again;
1044                 down_write(&cinode->lock_sem);
1045                 list_del_init(&lock->blist);
1046         }
1047
1048         up_write(&cinode->lock_sem);
1049         return rc;
1050 }
1051
1052 /*
1053  * Check if there is another lock that prevents us to set the lock (posix
1054  * style). If such a lock exists, update the flock structure with its
1055  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1056  * or leave it the same if we can't. Returns 0 if we don't need to request to
1057  * the server or 1 otherwise.
1058  */
1059 static int
1060 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1061 {
1062         int rc = 0;
1063         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1064         unsigned char saved_type = flock->fl_type;
1065
1066         if ((flock->fl_flags & FL_POSIX) == 0)
1067                 return 1;
1068
1069         down_read(&cinode->lock_sem);
1070         posix_test_lock(file, flock);
1071
1072         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1073                 flock->fl_type = saved_type;
1074                 rc = 1;
1075         }
1076
1077         up_read(&cinode->lock_sem);
1078         return rc;
1079 }
1080
1081 /*
1082  * Set the byte-range lock (posix style). Returns:
1083  * 1) 0, if we set the lock and don't need to request to the server;
1084  * 2) 1, if we need to request to the server;
1085  * 3) <0, if the error occurs while setting the lock.
1086  */
1087 static int
1088 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1089 {
1090         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1091         int rc = 1;
1092
1093         if ((flock->fl_flags & FL_POSIX) == 0)
1094                 return rc;
1095
1096 try_again:
1097         down_write(&cinode->lock_sem);
1098         if (!cinode->can_cache_brlcks) {
1099                 up_write(&cinode->lock_sem);
1100                 return rc;
1101         }
1102
1103         rc = posix_lock_file(file, flock, NULL);
1104         up_write(&cinode->lock_sem);
1105         if (rc == FILE_LOCK_DEFERRED) {
1106                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1107                 if (!rc)
1108                         goto try_again;
1109                 posix_unblock_lock(flock);
1110         }
1111         return rc;
1112 }
1113
1114 int
1115 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1116 {
1117         unsigned int xid;
1118         int rc = 0, stored_rc;
1119         struct cifsLockInfo *li, *tmp;
1120         struct cifs_tcon *tcon;
1121         unsigned int num, max_num, max_buf;
1122         LOCKING_ANDX_RANGE *buf, *cur;
1123         static const int types[] = {
1124                 LOCKING_ANDX_LARGE_FILES,
1125                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1126         };
1127         int i;
1128
1129         xid = get_xid();
1130         tcon = tlink_tcon(cfile->tlink);
1131
1132         /*
1133          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1134          * and check it for zero before using.
1135          */
1136         max_buf = tcon->ses->server->maxBuf;
1137         if (!max_buf) {
1138                 free_xid(xid);
1139                 return -EINVAL;
1140         }
1141
1142         max_num = (max_buf - sizeof(struct smb_hdr)) /
1143                                                 sizeof(LOCKING_ANDX_RANGE);
1144         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1145         if (!buf) {
1146                 free_xid(xid);
1147                 return -ENOMEM;
1148         }
1149
1150         for (i = 0; i < 2; i++) {
1151                 cur = buf;
1152                 num = 0;
1153                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1154                         if (li->type != types[i])
1155                                 continue;
1156                         cur->Pid = cpu_to_le16(li->pid);
1157                         cur->LengthLow = cpu_to_le32((u32)li->length);
1158                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1159                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1160                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1161                         if (++num == max_num) {
1162                                 stored_rc = cifs_lockv(xid, tcon,
1163                                                        cfile->fid.netfid,
1164                                                        (__u8)li->type, 0, num,
1165                                                        buf);
1166                                 if (stored_rc)
1167                                         rc = stored_rc;
1168                                 cur = buf;
1169                                 num = 0;
1170                         } else
1171                                 cur++;
1172                 }
1173
1174                 if (num) {
1175                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1176                                                (__u8)types[i], 0, num, buf);
1177                         if (stored_rc)
1178                                 rc = stored_rc;
1179                 }
1180         }
1181
1182         kfree(buf);
1183         free_xid(xid);
1184         return rc;
1185 }
1186
1187 static __u32
1188 hash_lockowner(fl_owner_t owner)
1189 {
1190         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1191 }
1192
1193 struct lock_to_push {
1194         struct list_head llist;
1195         __u64 offset;
1196         __u64 length;
1197         __u32 pid;
1198         __u16 netfid;
1199         __u8 type;
1200 };
1201
1202 static int
1203 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1204 {
1205         struct inode *inode = d_inode(cfile->dentry);
1206         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1207         struct file_lock *flock;
1208         struct file_lock_context *flctx = inode->i_flctx;
1209         unsigned int count = 0, i;
1210         int rc = 0, xid, type;
1211         struct list_head locks_to_send, *el;
1212         struct lock_to_push *lck, *tmp;
1213         __u64 length;
1214
1215         xid = get_xid();
1216
1217         if (!flctx)
1218                 goto out;
1219
1220         spin_lock(&flctx->flc_lock);
1221         list_for_each(el, &flctx->flc_posix) {
1222                 count++;
1223         }
1224         spin_unlock(&flctx->flc_lock);
1225
1226         INIT_LIST_HEAD(&locks_to_send);
1227
1228         /*
1229          * Allocating count locks is enough because no FL_POSIX locks can be
1230          * added to the list while we are holding cinode->lock_sem that
1231          * protects locking operations of this inode.
1232          */
1233         for (i = 0; i < count; i++) {
1234                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1235                 if (!lck) {
1236                         rc = -ENOMEM;
1237                         goto err_out;
1238                 }
1239                 list_add_tail(&lck->llist, &locks_to_send);
1240         }
1241
1242         el = locks_to_send.next;
1243         spin_lock(&flctx->flc_lock);
1244         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1245                 if (el == &locks_to_send) {
1246                         /*
1247                          * The list ended. We don't have enough allocated
1248                          * structures - something is really wrong.
1249                          */
1250                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1251                         break;
1252                 }
1253                 length = 1 + flock->fl_end - flock->fl_start;
1254                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1255                         type = CIFS_RDLCK;
1256                 else
1257                         type = CIFS_WRLCK;
1258                 lck = list_entry(el, struct lock_to_push, llist);
1259                 lck->pid = hash_lockowner(flock->fl_owner);
1260                 lck->netfid = cfile->fid.netfid;
1261                 lck->length = length;
1262                 lck->type = type;
1263                 lck->offset = flock->fl_start;
1264         }
1265         spin_unlock(&flctx->flc_lock);
1266
1267         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1268                 int stored_rc;
1269
1270                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1271                                              lck->offset, lck->length, NULL,
1272                                              lck->type, 0);
1273                 if (stored_rc)
1274                         rc = stored_rc;
1275                 list_del(&lck->llist);
1276                 kfree(lck);
1277         }
1278
1279 out:
1280         free_xid(xid);
1281         return rc;
1282 err_out:
1283         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1284                 list_del(&lck->llist);
1285                 kfree(lck);
1286         }
1287         goto out;
1288 }
1289
1290 static int
1291 cifs_push_locks(struct cifsFileInfo *cfile)
1292 {
1293         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1294         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1295         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1296         int rc = 0;
1297
1298         /* we are going to update can_cache_brlcks here - need a write access */
1299         down_write(&cinode->lock_sem);
1300         if (!cinode->can_cache_brlcks) {
1301                 up_write(&cinode->lock_sem);
1302                 return rc;
1303         }
1304
1305         if (cap_unix(tcon->ses) &&
1306             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1307             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1308                 rc = cifs_push_posix_locks(cfile);
1309         else
1310                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1311
1312         cinode->can_cache_brlcks = false;
1313         up_write(&cinode->lock_sem);
1314         return rc;
1315 }
1316
1317 static void
1318 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1319                 bool *wait_flag, struct TCP_Server_Info *server)
1320 {
1321         if (flock->fl_flags & FL_POSIX)
1322                 cifs_dbg(FYI, "Posix\n");
1323         if (flock->fl_flags & FL_FLOCK)
1324                 cifs_dbg(FYI, "Flock\n");
1325         if (flock->fl_flags & FL_SLEEP) {
1326                 cifs_dbg(FYI, "Blocking lock\n");
1327                 *wait_flag = true;
1328         }
1329         if (flock->fl_flags & FL_ACCESS)
1330                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1331         if (flock->fl_flags & FL_LEASE)
1332                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1333         if (flock->fl_flags &
1334             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1335                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1336                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1337
1338         *type = server->vals->large_lock_type;
1339         if (flock->fl_type == F_WRLCK) {
1340                 cifs_dbg(FYI, "F_WRLCK\n");
1341                 *type |= server->vals->exclusive_lock_type;
1342                 *lock = 1;
1343         } else if (flock->fl_type == F_UNLCK) {
1344                 cifs_dbg(FYI, "F_UNLCK\n");
1345                 *type |= server->vals->unlock_lock_type;
1346                 *unlock = 1;
1347                 /* Check if unlock includes more than one lock range */
1348         } else if (flock->fl_type == F_RDLCK) {
1349                 cifs_dbg(FYI, "F_RDLCK\n");
1350                 *type |= server->vals->shared_lock_type;
1351                 *lock = 1;
1352         } else if (flock->fl_type == F_EXLCK) {
1353                 cifs_dbg(FYI, "F_EXLCK\n");
1354                 *type |= server->vals->exclusive_lock_type;
1355                 *lock = 1;
1356         } else if (flock->fl_type == F_SHLCK) {
1357                 cifs_dbg(FYI, "F_SHLCK\n");
1358                 *type |= server->vals->shared_lock_type;
1359                 *lock = 1;
1360         } else
1361                 cifs_dbg(FYI, "Unknown type of lock\n");
1362 }
1363
1364 static int
1365 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1366            bool wait_flag, bool posix_lck, unsigned int xid)
1367 {
1368         int rc = 0;
1369         __u64 length = 1 + flock->fl_end - flock->fl_start;
1370         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1371         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1372         struct TCP_Server_Info *server = tcon->ses->server;
1373         __u16 netfid = cfile->fid.netfid;
1374
1375         if (posix_lck) {
1376                 int posix_lock_type;
1377
1378                 rc = cifs_posix_lock_test(file, flock);
1379                 if (!rc)
1380                         return rc;
1381
1382                 if (type & server->vals->shared_lock_type)
1383                         posix_lock_type = CIFS_RDLCK;
1384                 else
1385                         posix_lock_type = CIFS_WRLCK;
1386                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1387                                       hash_lockowner(flock->fl_owner),
1388                                       flock->fl_start, length, flock,
1389                                       posix_lock_type, wait_flag);
1390                 return rc;
1391         }
1392
1393         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1394         if (!rc)
1395                 return rc;
1396
1397         /* BB we could chain these into one lock request BB */
1398         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1399                                     1, 0, false);
1400         if (rc == 0) {
1401                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1402                                             type, 0, 1, false);
1403                 flock->fl_type = F_UNLCK;
1404                 if (rc != 0)
1405                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1406                                  rc);
1407                 return 0;
1408         }
1409
1410         if (type & server->vals->shared_lock_type) {
1411                 flock->fl_type = F_WRLCK;
1412                 return 0;
1413         }
1414
1415         type &= ~server->vals->exclusive_lock_type;
1416
1417         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1418                                     type | server->vals->shared_lock_type,
1419                                     1, 0, false);
1420         if (rc == 0) {
1421                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1422                         type | server->vals->shared_lock_type, 0, 1, false);
1423                 flock->fl_type = F_RDLCK;
1424                 if (rc != 0)
1425                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1426                                  rc);
1427         } else
1428                 flock->fl_type = F_WRLCK;
1429
1430         return 0;
1431 }
1432
1433 void
1434 cifs_move_llist(struct list_head *source, struct list_head *dest)
1435 {
1436         struct list_head *li, *tmp;
1437         list_for_each_safe(li, tmp, source)
1438                 list_move(li, dest);
1439 }
1440
1441 void
1442 cifs_free_llist(struct list_head *llist)
1443 {
1444         struct cifsLockInfo *li, *tmp;
1445         list_for_each_entry_safe(li, tmp, llist, llist) {
1446                 cifs_del_lock_waiters(li);
1447                 list_del(&li->llist);
1448                 kfree(li);
1449         }
1450 }
1451
1452 int
1453 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1454                   unsigned int xid)
1455 {
1456         int rc = 0, stored_rc;
1457         static const int types[] = {
1458                 LOCKING_ANDX_LARGE_FILES,
1459                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1460         };
1461         unsigned int i;
1462         unsigned int max_num, num, max_buf;
1463         LOCKING_ANDX_RANGE *buf, *cur;
1464         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1465         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1466         struct cifsLockInfo *li, *tmp;
1467         __u64 length = 1 + flock->fl_end - flock->fl_start;
1468         struct list_head tmp_llist;
1469
1470         INIT_LIST_HEAD(&tmp_llist);
1471
1472         /*
1473          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1474          * and check it for zero before using.
1475          */
1476         max_buf = tcon->ses->server->maxBuf;
1477         if (!max_buf)
1478                 return -EINVAL;
1479
1480         max_num = (max_buf - sizeof(struct smb_hdr)) /
1481                                                 sizeof(LOCKING_ANDX_RANGE);
1482         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1483         if (!buf)
1484                 return -ENOMEM;
1485
1486         down_write(&cinode->lock_sem);
1487         for (i = 0; i < 2; i++) {
1488                 cur = buf;
1489                 num = 0;
1490                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1491                         if (flock->fl_start > li->offset ||
1492                             (flock->fl_start + length) <
1493                             (li->offset + li->length))
1494                                 continue;
1495                         if (current->tgid != li->pid)
1496                                 continue;
1497                         if (types[i] != li->type)
1498                                 continue;
1499                         if (cinode->can_cache_brlcks) {
1500                                 /*
1501                                  * We can cache brlock requests - simply remove
1502                                  * a lock from the file's list.
1503                                  */
1504                                 list_del(&li->llist);
1505                                 cifs_del_lock_waiters(li);
1506                                 kfree(li);
1507                                 continue;
1508                         }
1509                         cur->Pid = cpu_to_le16(li->pid);
1510                         cur->LengthLow = cpu_to_le32((u32)li->length);
1511                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1512                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1513                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1514                         /*
1515                          * We need to save a lock here to let us add it again to
1516                          * the file's list if the unlock range request fails on
1517                          * the server.
1518                          */
1519                         list_move(&li->llist, &tmp_llist);
1520                         if (++num == max_num) {
1521                                 stored_rc = cifs_lockv(xid, tcon,
1522                                                        cfile->fid.netfid,
1523                                                        li->type, num, 0, buf);
1524                                 if (stored_rc) {
1525                                         /*
1526                                          * We failed on the unlock range
1527                                          * request - add all locks from the tmp
1528                                          * list to the head of the file's list.
1529                                          */
1530                                         cifs_move_llist(&tmp_llist,
1531                                                         &cfile->llist->locks);
1532                                         rc = stored_rc;
1533                                 } else
1534                                         /*
1535                                          * The unlock range request succeed -
1536                                          * free the tmp list.
1537                                          */
1538                                         cifs_free_llist(&tmp_llist);
1539                                 cur = buf;
1540                                 num = 0;
1541                         } else
1542                                 cur++;
1543                 }
1544                 if (num) {
1545                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1546                                                types[i], num, 0, buf);
1547                         if (stored_rc) {
1548                                 cifs_move_llist(&tmp_llist,
1549                                                 &cfile->llist->locks);
1550                                 rc = stored_rc;
1551                         } else
1552                                 cifs_free_llist(&tmp_llist);
1553                 }
1554         }
1555
1556         up_write(&cinode->lock_sem);
1557         kfree(buf);
1558         return rc;
1559 }
1560
1561 static int
1562 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1563            bool wait_flag, bool posix_lck, int lock, int unlock,
1564            unsigned int xid)
1565 {
1566         int rc = 0;
1567         __u64 length = 1 + flock->fl_end - flock->fl_start;
1568         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1569         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1570         struct TCP_Server_Info *server = tcon->ses->server;
1571         struct inode *inode = d_inode(cfile->dentry);
1572
1573         if (posix_lck) {
1574                 int posix_lock_type;
1575
1576                 rc = cifs_posix_lock_set(file, flock);
1577                 if (!rc || rc < 0)
1578                         return rc;
1579
1580                 if (type & server->vals->shared_lock_type)
1581                         posix_lock_type = CIFS_RDLCK;
1582                 else
1583                         posix_lock_type = CIFS_WRLCK;
1584
1585                 if (unlock == 1)
1586                         posix_lock_type = CIFS_UNLCK;
1587
1588                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1589                                       hash_lockowner(flock->fl_owner),
1590                                       flock->fl_start, length,
1591                                       NULL, posix_lock_type, wait_flag);
1592                 goto out;
1593         }
1594
1595         if (lock) {
1596                 struct cifsLockInfo *lock;
1597
1598                 lock = cifs_lock_init(flock->fl_start, length, type,
1599                                       flock->fl_flags);
1600                 if (!lock)
1601                         return -ENOMEM;
1602
1603                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1604                 if (rc < 0) {
1605                         kfree(lock);
1606                         return rc;
1607                 }
1608                 if (!rc)
1609                         goto out;
1610
1611                 /*
1612                  * Windows 7 server can delay breaking lease from read to None
1613                  * if we set a byte-range lock on a file - break it explicitly
1614                  * before sending the lock to the server to be sure the next
1615                  * read won't conflict with non-overlapted locks due to
1616                  * pagereading.
1617                  */
1618                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1619                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1620                         cifs_zap_mapping(inode);
1621                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1622                                  inode);
1623                         CIFS_I(inode)->oplock = 0;
1624                 }
1625
1626                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1627                                             type, 1, 0, wait_flag);
1628                 if (rc) {
1629                         kfree(lock);
1630                         return rc;
1631                 }
1632
1633                 cifs_lock_add(cfile, lock);
1634         } else if (unlock)
1635                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1636
1637 out:
1638         if (flock->fl_flags & FL_POSIX && !rc)
1639                 rc = locks_lock_file_wait(file, flock);
1640         return rc;
1641 }
1642
1643 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1644 {
1645         int rc, xid;
1646         int lock = 0, unlock = 0;
1647         bool wait_flag = false;
1648         bool posix_lck = false;
1649         struct cifs_sb_info *cifs_sb;
1650         struct cifs_tcon *tcon;
1651         struct cifsInodeInfo *cinode;
1652         struct cifsFileInfo *cfile;
1653         __u16 netfid;
1654         __u32 type;
1655
1656         rc = -EACCES;
1657         xid = get_xid();
1658
1659         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1660                  cmd, flock->fl_flags, flock->fl_type,
1661                  flock->fl_start, flock->fl_end);
1662
1663         cfile = (struct cifsFileInfo *)file->private_data;
1664         tcon = tlink_tcon(cfile->tlink);
1665
1666         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1667                         tcon->ses->server);
1668         cifs_sb = CIFS_FILE_SB(file);
1669         netfid = cfile->fid.netfid;
1670         cinode = CIFS_I(file_inode(file));
1671
1672         if (cap_unix(tcon->ses) &&
1673             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1674             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1675                 posix_lck = true;
1676         /*
1677          * BB add code here to normalize offset and length to account for
1678          * negative length which we can not accept over the wire.
1679          */
1680         if (IS_GETLK(cmd)) {
1681                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1682                 free_xid(xid);
1683                 return rc;
1684         }
1685
1686         if (!lock && !unlock) {
1687                 /*
1688                  * if no lock or unlock then nothing to do since we do not
1689                  * know what it is
1690                  */
1691                 free_xid(xid);
1692                 return -EOPNOTSUPP;
1693         }
1694
1695         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1696                         xid);
1697         free_xid(xid);
1698         return rc;
1699 }
1700
1701 /*
1702  * update the file size (if needed) after a write. Should be called with
1703  * the inode->i_lock held
1704  */
1705 void
1706 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1707                       unsigned int bytes_written)
1708 {
1709         loff_t end_of_write = offset + bytes_written;
1710
1711         if (end_of_write > cifsi->server_eof)
1712                 cifsi->server_eof = end_of_write;
1713 }
1714
1715 static ssize_t
1716 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1717            size_t write_size, loff_t *offset)
1718 {
1719         int rc = 0;
1720         unsigned int bytes_written = 0;
1721         unsigned int total_written;
1722         struct cifs_sb_info *cifs_sb;
1723         struct cifs_tcon *tcon;
1724         struct TCP_Server_Info *server;
1725         unsigned int xid;
1726         struct dentry *dentry = open_file->dentry;
1727         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1728         struct cifs_io_parms io_parms;
1729
1730         cifs_sb = CIFS_SB(dentry->d_sb);
1731
1732         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1733                  write_size, *offset, dentry);
1734
1735         tcon = tlink_tcon(open_file->tlink);
1736         server = tcon->ses->server;
1737
1738         if (!server->ops->sync_write)
1739                 return -ENOSYS;
1740
1741         xid = get_xid();
1742
1743         for (total_written = 0; write_size > total_written;
1744              total_written += bytes_written) {
1745                 rc = -EAGAIN;
1746                 while (rc == -EAGAIN) {
1747                         struct kvec iov[2];
1748                         unsigned int len;
1749
1750                         if (open_file->invalidHandle) {
1751                                 /* we could deadlock if we called
1752                                    filemap_fdatawait from here so tell
1753                                    reopen_file not to flush data to
1754                                    server now */
1755                                 rc = cifs_reopen_file(open_file, false);
1756                                 if (rc != 0)
1757                                         break;
1758                         }
1759
1760                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1761                                   (unsigned int)write_size - total_written);
1762                         /* iov[0] is reserved for smb header */
1763                         iov[1].iov_base = (char *)write_data + total_written;
1764                         iov[1].iov_len = len;
1765                         io_parms.pid = pid;
1766                         io_parms.tcon = tcon;
1767                         io_parms.offset = *offset;
1768                         io_parms.length = len;
1769                         rc = server->ops->sync_write(xid, &open_file->fid,
1770                                         &io_parms, &bytes_written, iov, 1);
1771                 }
1772                 if (rc || (bytes_written == 0)) {
1773                         if (total_written)
1774                                 break;
1775                         else {
1776                                 free_xid(xid);
1777                                 return rc;
1778                         }
1779                 } else {
1780                         spin_lock(&d_inode(dentry)->i_lock);
1781                         cifs_update_eof(cifsi, *offset, bytes_written);
1782                         spin_unlock(&d_inode(dentry)->i_lock);
1783                         *offset += bytes_written;
1784                 }
1785         }
1786
1787         cifs_stats_bytes_written(tcon, total_written);
1788
1789         if (total_written > 0) {
1790                 spin_lock(&d_inode(dentry)->i_lock);
1791                 if (*offset > d_inode(dentry)->i_size)
1792                         i_size_write(d_inode(dentry), *offset);
1793                 spin_unlock(&d_inode(dentry)->i_lock);
1794         }
1795         mark_inode_dirty_sync(d_inode(dentry));
1796         free_xid(xid);
1797         return total_written;
1798 }
1799
1800 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1801                                         bool fsuid_only)
1802 {
1803         struct cifsFileInfo *open_file = NULL;
1804         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1805         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1806
1807         /* only filter by fsuid on multiuser mounts */
1808         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1809                 fsuid_only = false;
1810
1811         spin_lock(&tcon->open_file_lock);
1812         /* we could simply get the first_list_entry since write-only entries
1813            are always at the end of the list but since the first entry might
1814            have a close pending, we go through the whole list */
1815         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1816                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1817                         continue;
1818                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1819                         if (!open_file->invalidHandle) {
1820                                 /* found a good file */
1821                                 /* lock it so it will not be closed on us */
1822                                 cifsFileInfo_get(open_file);
1823                                 spin_unlock(&tcon->open_file_lock);
1824                                 return open_file;
1825                         } /* else might as well continue, and look for
1826                              another, or simply have the caller reopen it
1827                              again rather than trying to fix this handle */
1828                 } else /* write only file */
1829                         break; /* write only files are last so must be done */
1830         }
1831         spin_unlock(&tcon->open_file_lock);
1832         return NULL;
1833 }
1834
1835 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1836                                         bool fsuid_only)
1837 {
1838         struct cifsFileInfo *open_file, *inv_file = NULL;
1839         struct cifs_sb_info *cifs_sb;
1840         struct cifs_tcon *tcon;
1841         bool any_available = false;
1842         int rc;
1843         unsigned int refind = 0;
1844
1845         /* Having a null inode here (because mapping->host was set to zero by
1846         the VFS or MM) should not happen but we had reports of on oops (due to
1847         it being zero) during stress testcases so we need to check for it */
1848
1849         if (cifs_inode == NULL) {
1850                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1851                 dump_stack();
1852                 return NULL;
1853         }
1854
1855         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1856         tcon = cifs_sb_master_tcon(cifs_sb);
1857
1858         /* only filter by fsuid on multiuser mounts */
1859         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1860                 fsuid_only = false;
1861
1862         spin_lock(&tcon->open_file_lock);
1863 refind_writable:
1864         if (refind > MAX_REOPEN_ATT) {
1865                 spin_unlock(&tcon->open_file_lock);
1866                 return NULL;
1867         }
1868         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1869                 if (!any_available && open_file->pid != current->tgid)
1870                         continue;
1871                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1872                         continue;
1873                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1874                         if (!open_file->invalidHandle) {
1875                                 /* found a good writable file */
1876                                 cifsFileInfo_get(open_file);
1877                                 spin_unlock(&tcon->open_file_lock);
1878                                 return open_file;
1879                         } else {
1880                                 if (!inv_file)
1881                                         inv_file = open_file;
1882                         }
1883                 }
1884         }
1885         /* couldn't find useable FH with same pid, try any available */
1886         if (!any_available) {
1887                 any_available = true;
1888                 goto refind_writable;
1889         }
1890
1891         if (inv_file) {
1892                 any_available = false;
1893                 cifsFileInfo_get(inv_file);
1894         }
1895
1896         spin_unlock(&tcon->open_file_lock);
1897
1898         if (inv_file) {
1899                 rc = cifs_reopen_file(inv_file, false);
1900                 if (!rc)
1901                         return inv_file;
1902                 else {
1903                         spin_lock(&tcon->open_file_lock);
1904                         list_move_tail(&inv_file->flist,
1905                                         &cifs_inode->openFileList);
1906                         spin_unlock(&tcon->open_file_lock);
1907                         cifsFileInfo_put(inv_file);
1908                         ++refind;
1909                         inv_file = NULL;
1910                         spin_lock(&tcon->open_file_lock);
1911                         goto refind_writable;
1912                 }
1913         }
1914
1915         return NULL;
1916 }
1917
1918 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1919 {
1920         struct address_space *mapping = page->mapping;
1921         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1922         char *write_data;
1923         int rc = -EFAULT;
1924         int bytes_written = 0;
1925         struct inode *inode;
1926         struct cifsFileInfo *open_file;
1927
1928         if (!mapping || !mapping->host)
1929                 return -EFAULT;
1930
1931         inode = page->mapping->host;
1932
1933         offset += (loff_t)from;
1934         write_data = kmap(page);
1935         write_data += from;
1936
1937         if ((to > PAGE_SIZE) || (from > to)) {
1938                 kunmap(page);
1939                 return -EIO;
1940         }
1941
1942         /* racing with truncate? */
1943         if (offset > mapping->host->i_size) {
1944                 kunmap(page);
1945                 return 0; /* don't care */
1946         }
1947
1948         /* check to make sure that we are not extending the file */
1949         if (mapping->host->i_size - offset < (loff_t)to)
1950                 to = (unsigned)(mapping->host->i_size - offset);
1951
1952         open_file = find_writable_file(CIFS_I(mapping->host), false);
1953         if (open_file) {
1954                 bytes_written = cifs_write(open_file, open_file->pid,
1955                                            write_data, to - from, &offset);
1956                 cifsFileInfo_put(open_file);
1957                 /* Does mm or vfs already set times? */
1958                 inode->i_atime = inode->i_mtime = current_time(inode);
1959                 if ((bytes_written > 0) && (offset))
1960                         rc = 0;
1961                 else if (bytes_written < 0)
1962                         rc = bytes_written;
1963         } else {
1964                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1965                 rc = -EIO;
1966         }
1967
1968         kunmap(page);
1969         return rc;
1970 }
1971
1972 static struct cifs_writedata *
1973 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1974                           pgoff_t end, pgoff_t *index,
1975                           unsigned int *found_pages)
1976 {
1977         struct cifs_writedata *wdata;
1978
1979         wdata = cifs_writedata_alloc((unsigned int)tofind,
1980                                      cifs_writev_complete);
1981         if (!wdata)
1982                 return NULL;
1983
1984         *found_pages = find_get_pages_range_tag(mapping, index, end,
1985                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
1986         return wdata;
1987 }
1988
1989 static unsigned int
1990 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1991                     struct address_space *mapping,
1992                     struct writeback_control *wbc,
1993                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1994 {
1995         unsigned int nr_pages = 0, i;
1996         struct page *page;
1997
1998         for (i = 0; i < found_pages; i++) {
1999                 page = wdata->pages[i];
2000                 /*
2001                  * At this point we hold neither the i_pages lock nor the
2002                  * page lock: the page may be truncated or invalidated
2003                  * (changing page->mapping to NULL), or even swizzled
2004                  * back from swapper_space to tmpfs file mapping
2005                  */
2006
2007                 if (nr_pages == 0)
2008                         lock_page(page);
2009                 else if (!trylock_page(page))
2010                         break;
2011
2012                 if (unlikely(page->mapping != mapping)) {
2013                         unlock_page(page);
2014                         break;
2015                 }
2016
2017                 if (!wbc->range_cyclic && page->index > end) {
2018                         *done = true;
2019                         unlock_page(page);
2020                         break;
2021                 }
2022
2023                 if (*next && (page->index != *next)) {
2024                         /* Not next consecutive page */
2025                         unlock_page(page);
2026                         break;
2027                 }
2028
2029                 if (wbc->sync_mode != WB_SYNC_NONE)
2030                         wait_on_page_writeback(page);
2031
2032                 if (PageWriteback(page) ||
2033                                 !clear_page_dirty_for_io(page)) {
2034                         unlock_page(page);
2035                         break;
2036                 }
2037
2038                 /*
2039                  * This actually clears the dirty bit in the radix tree.
2040                  * See cifs_writepage() for more commentary.
2041                  */
2042                 set_page_writeback(page);
2043                 if (page_offset(page) >= i_size_read(mapping->host)) {
2044                         *done = true;
2045                         unlock_page(page);
2046                         end_page_writeback(page);
2047                         break;
2048                 }
2049
2050                 wdata->pages[i] = page;
2051                 *next = page->index + 1;
2052                 ++nr_pages;
2053         }
2054
2055         /* reset index to refind any pages skipped */
2056         if (nr_pages == 0)
2057                 *index = wdata->pages[0]->index + 1;
2058
2059         /* put any pages we aren't going to use */
2060         for (i = nr_pages; i < found_pages; i++) {
2061                 put_page(wdata->pages[i]);
2062                 wdata->pages[i] = NULL;
2063         }
2064
2065         return nr_pages;
2066 }
2067
2068 static int
2069 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2070                  struct address_space *mapping, struct writeback_control *wbc)
2071 {
2072         int rc = 0;
2073         struct TCP_Server_Info *server;
2074         unsigned int i;
2075
2076         wdata->sync_mode = wbc->sync_mode;
2077         wdata->nr_pages = nr_pages;
2078         wdata->offset = page_offset(wdata->pages[0]);
2079         wdata->pagesz = PAGE_SIZE;
2080         wdata->tailsz = min(i_size_read(mapping->host) -
2081                         page_offset(wdata->pages[nr_pages - 1]),
2082                         (loff_t)PAGE_SIZE);
2083         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2084
2085         if (wdata->cfile != NULL)
2086                 cifsFileInfo_put(wdata->cfile);
2087         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2088         if (!wdata->cfile) {
2089                 cifs_dbg(VFS, "No writable handles for inode\n");
2090                 rc = -EBADF;
2091         } else {
2092                 wdata->pid = wdata->cfile->pid;
2093                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2094                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2095         }
2096
2097         for (i = 0; i < nr_pages; ++i)
2098                 unlock_page(wdata->pages[i]);
2099
2100         return rc;
2101 }
2102
2103 static int cifs_writepages(struct address_space *mapping,
2104                            struct writeback_control *wbc)
2105 {
2106         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2107         struct TCP_Server_Info *server;
2108         bool done = false, scanned = false, range_whole = false;
2109         pgoff_t end, index;
2110         struct cifs_writedata *wdata;
2111         int rc = 0;
2112         unsigned int xid;
2113
2114         /*
2115          * If wsize is smaller than the page cache size, default to writing
2116          * one page at a time via cifs_writepage
2117          */
2118         if (cifs_sb->wsize < PAGE_SIZE)
2119                 return generic_writepages(mapping, wbc);
2120
2121         xid = get_xid();
2122         if (wbc->range_cyclic) {
2123                 index = mapping->writeback_index; /* Start from prev offset */
2124                 end = -1;
2125         } else {
2126                 index = wbc->range_start >> PAGE_SHIFT;
2127                 end = wbc->range_end >> PAGE_SHIFT;
2128                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2129                         range_whole = true;
2130                 scanned = true;
2131         }
2132         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2133 retry:
2134         while (!done && index <= end) {
2135                 unsigned int i, nr_pages, found_pages, wsize, credits;
2136                 pgoff_t next = 0, tofind, saved_index = index;
2137
2138                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2139                                                    &wsize, &credits);
2140                 if (rc)
2141                         break;
2142
2143                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2144
2145                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2146                                                   &found_pages);
2147                 if (!wdata) {
2148                         rc = -ENOMEM;
2149                         add_credits_and_wake_if(server, credits, 0);
2150                         break;
2151                 }
2152
2153                 if (found_pages == 0) {
2154                         kref_put(&wdata->refcount, cifs_writedata_release);
2155                         add_credits_and_wake_if(server, credits, 0);
2156                         break;
2157                 }
2158
2159                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2160                                                end, &index, &next, &done);
2161
2162                 /* nothing to write? */
2163                 if (nr_pages == 0) {
2164                         kref_put(&wdata->refcount, cifs_writedata_release);
2165                         add_credits_and_wake_if(server, credits, 0);
2166                         continue;
2167                 }
2168
2169                 wdata->credits = credits;
2170
2171                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2172
2173                 /* send failure -- clean up the mess */
2174                 if (rc != 0) {
2175                         add_credits_and_wake_if(server, wdata->credits, 0);
2176                         for (i = 0; i < nr_pages; ++i) {
2177                                 if (rc == -EAGAIN)
2178                                         redirty_page_for_writepage(wbc,
2179                                                            wdata->pages[i]);
2180                                 else
2181                                         SetPageError(wdata->pages[i]);
2182                                 end_page_writeback(wdata->pages[i]);
2183                                 put_page(wdata->pages[i]);
2184                         }
2185                         if (rc != -EAGAIN)
2186                                 mapping_set_error(mapping, rc);
2187                 }
2188                 kref_put(&wdata->refcount, cifs_writedata_release);
2189
2190                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2191                         index = saved_index;
2192                         continue;
2193                 }
2194
2195                 wbc->nr_to_write -= nr_pages;
2196                 if (wbc->nr_to_write <= 0)
2197                         done = true;
2198
2199                 index = next;
2200         }
2201
2202         if (!scanned && !done) {
2203                 /*
2204                  * We hit the last page and there is more work to be done: wrap
2205                  * back to the start of the file
2206                  */
2207                 scanned = true;
2208                 index = 0;
2209                 goto retry;
2210         }
2211
2212         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2213                 mapping->writeback_index = index;
2214
2215         free_xid(xid);
2216         return rc;
2217 }
2218
2219 static int
2220 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2221 {
2222         int rc;
2223         unsigned int xid;
2224
2225         xid = get_xid();
2226 /* BB add check for wbc flags */
2227         get_page(page);
2228         if (!PageUptodate(page))
2229                 cifs_dbg(FYI, "ppw - page not up to date\n");
2230
2231         /*
2232          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2233          *
2234          * A writepage() implementation always needs to do either this,
2235          * or re-dirty the page with "redirty_page_for_writepage()" in
2236          * the case of a failure.
2237          *
2238          * Just unlocking the page will cause the radix tree tag-bits
2239          * to fail to update with the state of the page correctly.
2240          */
2241         set_page_writeback(page);
2242 retry_write:
2243         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2244         if (rc == -EAGAIN) {
2245                 if (wbc->sync_mode == WB_SYNC_ALL)
2246                         goto retry_write;
2247                 redirty_page_for_writepage(wbc, page);
2248         } else if (rc != 0) {
2249                 SetPageError(page);
2250                 mapping_set_error(page->mapping, rc);
2251         } else {
2252                 SetPageUptodate(page);
2253         }
2254         end_page_writeback(page);
2255         put_page(page);
2256         free_xid(xid);
2257         return rc;
2258 }
2259
2260 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2261 {
2262         int rc = cifs_writepage_locked(page, wbc);
2263         unlock_page(page);
2264         return rc;
2265 }
2266
2267 static int cifs_write_end(struct file *file, struct address_space *mapping,
2268                         loff_t pos, unsigned len, unsigned copied,
2269                         struct page *page, void *fsdata)
2270 {
2271         int rc;
2272         struct inode *inode = mapping->host;
2273         struct cifsFileInfo *cfile = file->private_data;
2274         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2275         __u32 pid;
2276
2277         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2278                 pid = cfile->pid;
2279         else
2280                 pid = current->tgid;
2281
2282         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2283                  page, pos, copied);
2284
2285         if (PageChecked(page)) {
2286                 if (copied == len)
2287                         SetPageUptodate(page);
2288                 ClearPageChecked(page);
2289         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2290                 SetPageUptodate(page);
2291
2292         if (!PageUptodate(page)) {
2293                 char *page_data;
2294                 unsigned offset = pos & (PAGE_SIZE - 1);
2295                 unsigned int xid;
2296
2297                 xid = get_xid();
2298                 /* this is probably better than directly calling
2299                    partialpage_write since in this function the file handle is
2300                    known which we might as well leverage */
2301                 /* BB check if anything else missing out of ppw
2302                    such as updating last write time */
2303                 page_data = kmap(page);
2304                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2305                 /* if (rc < 0) should we set writebehind rc? */
2306                 kunmap(page);
2307
2308                 free_xid(xid);
2309         } else {
2310                 rc = copied;
2311                 pos += copied;
2312                 set_page_dirty(page);
2313         }
2314
2315         if (rc > 0) {
2316                 spin_lock(&inode->i_lock);
2317                 if (pos > inode->i_size)
2318                         i_size_write(inode, pos);
2319                 spin_unlock(&inode->i_lock);
2320         }
2321
2322         unlock_page(page);
2323         put_page(page);
2324
2325         return rc;
2326 }
2327
2328 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2329                       int datasync)
2330 {
2331         unsigned int xid;
2332         int rc = 0;
2333         struct cifs_tcon *tcon;
2334         struct TCP_Server_Info *server;
2335         struct cifsFileInfo *smbfile = file->private_data;
2336         struct inode *inode = file_inode(file);
2337         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2338
2339         rc = file_write_and_wait_range(file, start, end);
2340         if (rc)
2341                 return rc;
2342         inode_lock(inode);
2343
2344         xid = get_xid();
2345
2346         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2347                  file, datasync);
2348
2349         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2350                 rc = cifs_zap_mapping(inode);
2351                 if (rc) {
2352                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2353                         rc = 0; /* don't care about it in fsync */
2354                 }
2355         }
2356
2357         tcon = tlink_tcon(smbfile->tlink);
2358         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2359                 server = tcon->ses->server;
2360                 if (server->ops->flush)
2361                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2362                 else
2363                         rc = -ENOSYS;
2364         }
2365
2366         free_xid(xid);
2367         inode_unlock(inode);
2368         return rc;
2369 }
2370
2371 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2372 {
2373         unsigned int xid;
2374         int rc = 0;
2375         struct cifs_tcon *tcon;
2376         struct TCP_Server_Info *server;
2377         struct cifsFileInfo *smbfile = file->private_data;
2378         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2379         struct inode *inode = file->f_mapping->host;
2380
2381         rc = file_write_and_wait_range(file, start, end);
2382         if (rc)
2383                 return rc;
2384         inode_lock(inode);
2385
2386         xid = get_xid();
2387
2388         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2389                  file, datasync);
2390
2391         tcon = tlink_tcon(smbfile->tlink);
2392         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2393                 server = tcon->ses->server;
2394                 if (server->ops->flush)
2395                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2396                 else
2397                         rc = -ENOSYS;
2398         }
2399
2400         free_xid(xid);
2401         inode_unlock(inode);
2402         return rc;
2403 }
2404
2405 /*
2406  * As file closes, flush all cached write data for this inode checking
2407  * for write behind errors.
2408  */
2409 int cifs_flush(struct file *file, fl_owner_t id)
2410 {
2411         struct inode *inode = file_inode(file);
2412         int rc = 0;
2413
2414         if (file->f_mode & FMODE_WRITE)
2415                 rc = filemap_write_and_wait(inode->i_mapping);
2416
2417         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2418
2419         return rc;
2420 }
2421
2422 static int
2423 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2424 {
2425         int rc = 0;
2426         unsigned long i;
2427
2428         for (i = 0; i < num_pages; i++) {
2429                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2430                 if (!pages[i]) {
2431                         /*
2432                          * save number of pages we have already allocated and
2433                          * return with ENOMEM error
2434                          */
2435                         num_pages = i;
2436                         rc = -ENOMEM;
2437                         break;
2438                 }
2439         }
2440
2441         if (rc) {
2442                 for (i = 0; i < num_pages; i++)
2443                         put_page(pages[i]);
2444         }
2445         return rc;
2446 }
2447
2448 static inline
2449 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2450 {
2451         size_t num_pages;
2452         size_t clen;
2453
2454         clen = min_t(const size_t, len, wsize);
2455         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2456
2457         if (cur_len)
2458                 *cur_len = clen;
2459
2460         return num_pages;
2461 }
2462
2463 static void
2464 cifs_uncached_writedata_release(struct kref *refcount)
2465 {
2466         int i;
2467         struct cifs_writedata *wdata = container_of(refcount,
2468                                         struct cifs_writedata, refcount);
2469
2470         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2471         for (i = 0; i < wdata->nr_pages; i++)
2472                 put_page(wdata->pages[i]);
2473         cifs_writedata_release(refcount);
2474 }
2475
2476 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2477
2478 static void
2479 cifs_uncached_writev_complete(struct work_struct *work)
2480 {
2481         struct cifs_writedata *wdata = container_of(work,
2482                                         struct cifs_writedata, work);
2483         struct inode *inode = d_inode(wdata->cfile->dentry);
2484         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2485
2486         spin_lock(&inode->i_lock);
2487         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2488         if (cifsi->server_eof > inode->i_size)
2489                 i_size_write(inode, cifsi->server_eof);
2490         spin_unlock(&inode->i_lock);
2491
2492         complete(&wdata->done);
2493         collect_uncached_write_data(wdata->ctx);
2494         /* the below call can possibly free the last ref to aio ctx */
2495         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2496 }
2497
2498 static int
2499 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2500                       size_t *len, unsigned long *num_pages)
2501 {
2502         size_t save_len, copied, bytes, cur_len = *len;
2503         unsigned long i, nr_pages = *num_pages;
2504
2505         save_len = cur_len;
2506         for (i = 0; i < nr_pages; i++) {
2507                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2508                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2509                 cur_len -= copied;
2510                 /*
2511                  * If we didn't copy as much as we expected, then that
2512                  * may mean we trod into an unmapped area. Stop copying
2513                  * at that point. On the next pass through the big
2514                  * loop, we'll likely end up getting a zero-length
2515                  * write and bailing out of it.
2516                  */
2517                 if (copied < bytes)
2518                         break;
2519         }
2520         cur_len = save_len - cur_len;
2521         *len = cur_len;
2522
2523         /*
2524          * If we have no data to send, then that probably means that
2525          * the copy above failed altogether. That's most likely because
2526          * the address in the iovec was bogus. Return -EFAULT and let
2527          * the caller free anything we allocated and bail out.
2528          */
2529         if (!cur_len)
2530                 return -EFAULT;
2531
2532         /*
2533          * i + 1 now represents the number of pages we actually used in
2534          * the copy phase above.
2535          */
2536         *num_pages = i + 1;
2537         return 0;
2538 }
2539
2540 static int
2541 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2542         struct cifs_aio_ctx *ctx)
2543 {
2544         int wait_retry = 0;
2545         unsigned int wsize, credits;
2546         int rc;
2547         struct TCP_Server_Info *server =
2548                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2549
2550         /*
2551          * Try to resend this wdata, waiting for credits up to 3 seconds.
2552          * Note: we are attempting to resend the whole wdata not in segments
2553          */
2554         do {
2555                 rc = server->ops->wait_mtu_credits(
2556                         server, wdata->bytes, &wsize, &credits);
2557
2558                 if (rc)
2559                         break;
2560
2561                 if (wsize < wdata->bytes) {
2562                         add_credits_and_wake_if(server, credits, 0);
2563                         msleep(1000);
2564                         wait_retry++;
2565                 }
2566         } while (wsize < wdata->bytes && wait_retry < 3);
2567
2568         if (wsize < wdata->bytes) {
2569                 rc = -EBUSY;
2570                 goto out;
2571         }
2572
2573         rc = -EAGAIN;
2574         while (rc == -EAGAIN) {
2575                 rc = 0;
2576                 if (wdata->cfile->invalidHandle)
2577                         rc = cifs_reopen_file(wdata->cfile, false);
2578                 if (!rc)
2579                         rc = server->ops->async_writev(wdata,
2580                                         cifs_uncached_writedata_release);
2581         }
2582
2583         if (!rc) {
2584                 list_add_tail(&wdata->list, wdata_list);
2585                 return 0;
2586         }
2587
2588         add_credits_and_wake_if(server, wdata->credits, 0);
2589 out:
2590         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2591
2592         return rc;
2593 }
2594
2595 static int
2596 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2597                      struct cifsFileInfo *open_file,
2598                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2599                      struct cifs_aio_ctx *ctx)
2600 {
2601         int rc = 0;
2602         size_t cur_len;
2603         unsigned long nr_pages, num_pages, i;
2604         struct cifs_writedata *wdata;
2605         struct iov_iter saved_from = *from;
2606         loff_t saved_offset = offset;
2607         pid_t pid;
2608         struct TCP_Server_Info *server;
2609         struct page **pagevec;
2610         size_t start;
2611
2612         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2613                 pid = open_file->pid;
2614         else
2615                 pid = current->tgid;
2616
2617         server = tlink_tcon(open_file->tlink)->ses->server;
2618
2619         do {
2620                 unsigned int wsize, credits;
2621
2622                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2623                                                    &wsize, &credits);
2624                 if (rc)
2625                         break;
2626
2627                 if (ctx->direct_io) {
2628                         ssize_t result;
2629
2630                         result = iov_iter_get_pages_alloc(
2631                                 from, &pagevec, wsize, &start);
2632                         if (result < 0) {
2633                                 cifs_dbg(VFS,
2634                                         "direct_writev couldn't get user pages "
2635                                         "(rc=%zd) iter type %d iov_offset %zd "
2636                                         "count %zd\n",
2637                                         result, from->type,
2638                                         from->iov_offset, from->count);
2639                                 dump_stack();
2640                                 break;
2641                         }
2642                         cur_len = (size_t)result;
2643                         iov_iter_advance(from, cur_len);
2644
2645                         nr_pages =
2646                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2647
2648                         wdata = cifs_writedata_direct_alloc(pagevec,
2649                                              cifs_uncached_writev_complete);
2650                         if (!wdata) {
2651                                 rc = -ENOMEM;
2652                                 add_credits_and_wake_if(server, credits, 0);
2653                                 break;
2654                         }
2655
2656
2657                         wdata->page_offset = start;
2658                         wdata->tailsz =
2659                                 nr_pages > 1 ?
2660                                         cur_len - (PAGE_SIZE - start) -
2661                                         (nr_pages - 2) * PAGE_SIZE :
2662                                         cur_len;
2663                 } else {
2664                         nr_pages = get_numpages(wsize, len, &cur_len);
2665                         wdata = cifs_writedata_alloc(nr_pages,
2666                                              cifs_uncached_writev_complete);
2667                         if (!wdata) {
2668                                 rc = -ENOMEM;
2669                                 add_credits_and_wake_if(server, credits, 0);
2670                                 break;
2671                         }
2672
2673                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2674                         if (rc) {
2675                                 kfree(wdata);
2676                                 add_credits_and_wake_if(server, credits, 0);
2677                                 break;
2678                         }
2679
2680                         num_pages = nr_pages;
2681                         rc = wdata_fill_from_iovec(
2682                                 wdata, from, &cur_len, &num_pages);
2683                         if (rc) {
2684                                 for (i = 0; i < nr_pages; i++)
2685                                         put_page(wdata->pages[i]);
2686                                 kfree(wdata);
2687                                 add_credits_and_wake_if(server, credits, 0);
2688                                 break;
2689                         }
2690
2691                         /*
2692                          * Bring nr_pages down to the number of pages we
2693                          * actually used, and free any pages that we didn't use.
2694                          */
2695                         for ( ; nr_pages > num_pages; nr_pages--)
2696                                 put_page(wdata->pages[nr_pages - 1]);
2697
2698                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2699                 }
2700
2701                 wdata->sync_mode = WB_SYNC_ALL;
2702                 wdata->nr_pages = nr_pages;
2703                 wdata->offset = (__u64)offset;
2704                 wdata->cfile = cifsFileInfo_get(open_file);
2705                 wdata->pid = pid;
2706                 wdata->bytes = cur_len;
2707                 wdata->pagesz = PAGE_SIZE;
2708                 wdata->credits = credits;
2709                 wdata->ctx = ctx;
2710                 kref_get(&ctx->refcount);
2711
2712                 if (!wdata->cfile->invalidHandle ||
2713                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2714                         rc = server->ops->async_writev(wdata,
2715                                         cifs_uncached_writedata_release);
2716                 if (rc) {
2717                         add_credits_and_wake_if(server, wdata->credits, 0);
2718                         kref_put(&wdata->refcount,
2719                                  cifs_uncached_writedata_release);
2720                         if (rc == -EAGAIN) {
2721                                 *from = saved_from;
2722                                 iov_iter_advance(from, offset - saved_offset);
2723                                 continue;
2724                         }
2725                         break;
2726                 }
2727
2728                 list_add_tail(&wdata->list, wdata_list);
2729                 offset += cur_len;
2730                 len -= cur_len;
2731         } while (len > 0);
2732
2733         return rc;
2734 }
2735
2736 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2737 {
2738         struct cifs_writedata *wdata, *tmp;
2739         struct cifs_tcon *tcon;
2740         struct cifs_sb_info *cifs_sb;
2741         struct dentry *dentry = ctx->cfile->dentry;
2742         unsigned int i;
2743         int rc;
2744
2745         tcon = tlink_tcon(ctx->cfile->tlink);
2746         cifs_sb = CIFS_SB(dentry->d_sb);
2747
2748         mutex_lock(&ctx->aio_mutex);
2749
2750         if (list_empty(&ctx->list)) {
2751                 mutex_unlock(&ctx->aio_mutex);
2752                 return;
2753         }
2754
2755         rc = ctx->rc;
2756         /*
2757          * Wait for and collect replies for any successful sends in order of
2758          * increasing offset. Once an error is hit, then return without waiting
2759          * for any more replies.
2760          */
2761 restart_loop:
2762         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2763                 if (!rc) {
2764                         if (!try_wait_for_completion(&wdata->done)) {
2765                                 mutex_unlock(&ctx->aio_mutex);
2766                                 return;
2767                         }
2768
2769                         if (wdata->result)
2770                                 rc = wdata->result;
2771                         else
2772                                 ctx->total_len += wdata->bytes;
2773
2774                         /* resend call if it's a retryable error */
2775                         if (rc == -EAGAIN) {
2776                                 struct list_head tmp_list;
2777                                 struct iov_iter tmp_from = ctx->iter;
2778
2779                                 INIT_LIST_HEAD(&tmp_list);
2780                                 list_del_init(&wdata->list);
2781
2782                                 if (ctx->direct_io)
2783                                         rc = cifs_resend_wdata(
2784                                                 wdata, &tmp_list, ctx);
2785                                 else {
2786                                         iov_iter_advance(&tmp_from,
2787                                                  wdata->offset - ctx->pos);
2788
2789                                         rc = cifs_write_from_iter(wdata->offset,
2790                                                 wdata->bytes, &tmp_from,
2791                                                 ctx->cfile, cifs_sb, &tmp_list,
2792                                                 ctx);
2793                                 }
2794
2795                                 list_splice(&tmp_list, &ctx->list);
2796
2797                                 kref_put(&wdata->refcount,
2798                                          cifs_uncached_writedata_release);
2799                                 goto restart_loop;
2800                         }
2801                 }
2802                 list_del_init(&wdata->list);
2803                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2804         }
2805
2806         if (!ctx->direct_io)
2807                 for (i = 0; i < ctx->npages; i++)
2808                         put_page(ctx->bv[i].bv_page);
2809
2810         cifs_stats_bytes_written(tcon, ctx->total_len);
2811         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2812
2813         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2814
2815         mutex_unlock(&ctx->aio_mutex);
2816
2817         if (ctx->iocb && ctx->iocb->ki_complete)
2818                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2819         else
2820                 complete(&ctx->done);
2821 }
2822
2823 static ssize_t __cifs_writev(
2824         struct kiocb *iocb, struct iov_iter *from, bool direct)
2825 {
2826         struct file *file = iocb->ki_filp;
2827         ssize_t total_written = 0;
2828         struct cifsFileInfo *cfile;
2829         struct cifs_tcon *tcon;
2830         struct cifs_sb_info *cifs_sb;
2831         struct cifs_aio_ctx *ctx;
2832         struct iov_iter saved_from = *from;
2833         size_t len = iov_iter_count(from);
2834         int rc;
2835
2836         /*
2837          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2838          * In this case, fall back to non-direct write function.
2839          * this could be improved by getting pages directly in ITER_KVEC
2840          */
2841         if (direct && from->type & ITER_KVEC) {
2842                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2843                 direct = false;
2844         }
2845
2846         rc = generic_write_checks(iocb, from);
2847         if (rc <= 0)
2848                 return rc;
2849
2850         cifs_sb = CIFS_FILE_SB(file);
2851         cfile = file->private_data;
2852         tcon = tlink_tcon(cfile->tlink);
2853
2854         if (!tcon->ses->server->ops->async_writev)
2855                 return -ENOSYS;
2856
2857         ctx = cifs_aio_ctx_alloc();
2858         if (!ctx)
2859                 return -ENOMEM;
2860
2861         ctx->cfile = cifsFileInfo_get(cfile);
2862
2863         if (!is_sync_kiocb(iocb))
2864                 ctx->iocb = iocb;
2865
2866         ctx->pos = iocb->ki_pos;
2867
2868         if (direct) {
2869                 ctx->direct_io = true;
2870                 ctx->iter = *from;
2871                 ctx->len = len;
2872         } else {
2873                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2874                 if (rc) {
2875                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2876                         return rc;
2877                 }
2878         }
2879
2880         /* grab a lock here due to read response handlers can access ctx */
2881         mutex_lock(&ctx->aio_mutex);
2882
2883         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2884                                   cfile, cifs_sb, &ctx->list, ctx);
2885
2886         /*
2887          * If at least one write was successfully sent, then discard any rc
2888          * value from the later writes. If the other write succeeds, then
2889          * we'll end up returning whatever was written. If it fails, then
2890          * we'll get a new rc value from that.
2891          */
2892         if (!list_empty(&ctx->list))
2893                 rc = 0;
2894
2895         mutex_unlock(&ctx->aio_mutex);
2896
2897         if (rc) {
2898                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2899                 return rc;
2900         }
2901
2902         if (!is_sync_kiocb(iocb)) {
2903                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2904                 return -EIOCBQUEUED;
2905         }
2906
2907         rc = wait_for_completion_killable(&ctx->done);
2908         if (rc) {
2909                 mutex_lock(&ctx->aio_mutex);
2910                 ctx->rc = rc = -EINTR;
2911                 total_written = ctx->total_len;
2912                 mutex_unlock(&ctx->aio_mutex);
2913         } else {
2914                 rc = ctx->rc;
2915                 total_written = ctx->total_len;
2916         }
2917
2918         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2919
2920         if (unlikely(!total_written))
2921                 return rc;
2922
2923         iocb->ki_pos += total_written;
2924         return total_written;
2925 }
2926
2927 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
2928 {
2929         return __cifs_writev(iocb, from, true);
2930 }
2931
2932 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2933 {
2934         return __cifs_writev(iocb, from, false);
2935 }
2936
2937 static ssize_t
2938 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2939 {
2940         struct file *file = iocb->ki_filp;
2941         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2942         struct inode *inode = file->f_mapping->host;
2943         struct cifsInodeInfo *cinode = CIFS_I(inode);
2944         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2945         ssize_t rc;
2946
2947         inode_lock(inode);
2948         /*
2949          * We need to hold the sem to be sure nobody modifies lock list
2950          * with a brlock that prevents writing.
2951          */
2952         down_read(&cinode->lock_sem);
2953
2954         rc = generic_write_checks(iocb, from);
2955         if (rc <= 0)
2956                 goto out;
2957
2958         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2959                                      server->vals->exclusive_lock_type, 0,
2960                                      NULL, CIFS_WRITE_OP))
2961                 rc = __generic_file_write_iter(iocb, from);
2962         else
2963                 rc = -EACCES;
2964 out:
2965         up_read(&cinode->lock_sem);
2966         inode_unlock(inode);
2967
2968         if (rc > 0)
2969                 rc = generic_write_sync(iocb, rc);
2970         return rc;
2971 }
2972
2973 ssize_t
2974 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2975 {
2976         struct inode *inode = file_inode(iocb->ki_filp);
2977         struct cifsInodeInfo *cinode = CIFS_I(inode);
2978         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2979         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2980                                                 iocb->ki_filp->private_data;
2981         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2982         ssize_t written;
2983
2984         written = cifs_get_writer(cinode);
2985         if (written)
2986                 return written;
2987
2988         if (CIFS_CACHE_WRITE(cinode)) {
2989                 if (cap_unix(tcon->ses) &&
2990                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2991                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2992                         written = generic_file_write_iter(iocb, from);
2993                         goto out;
2994                 }
2995                 written = cifs_writev(iocb, from);
2996                 goto out;
2997         }
2998         /*
2999          * For non-oplocked files in strict cache mode we need to write the data
3000          * to the server exactly from the pos to pos+len-1 rather than flush all
3001          * affected pages because it may cause a error with mandatory locks on
3002          * these pages but not on the region from pos to ppos+len-1.
3003          */
3004         written = cifs_user_writev(iocb, from);
3005         if (written > 0 && CIFS_CACHE_READ(cinode)) {
3006                 /*
3007                  * Windows 7 server can delay breaking level2 oplock if a write
3008                  * request comes - break it on the client to prevent reading
3009                  * an old data.
3010                  */
3011                 cifs_zap_mapping(inode);
3012                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
3013                          inode);
3014                 cinode->oplock = 0;
3015         }
3016 out:
3017         cifs_put_writer(cinode);
3018         return written;
3019 }
3020
3021 static struct cifs_readdata *
3022 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3023 {
3024         struct cifs_readdata *rdata;
3025
3026         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3027         if (rdata != NULL) {
3028                 rdata->pages = pages;
3029                 kref_init(&rdata->refcount);
3030                 INIT_LIST_HEAD(&rdata->list);
3031                 init_completion(&rdata->done);
3032                 INIT_WORK(&rdata->work, complete);
3033         }
3034
3035         return rdata;
3036 }
3037
3038 static struct cifs_readdata *
3039 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3040 {
3041         struct page **pages =
3042                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3043         struct cifs_readdata *ret = NULL;
3044
3045         if (pages) {
3046                 ret = cifs_readdata_direct_alloc(pages, complete);
3047                 if (!ret)
3048                         kfree(pages);
3049         }
3050
3051         return ret;
3052 }
3053
3054 void
3055 cifs_readdata_release(struct kref *refcount)
3056 {
3057         struct cifs_readdata *rdata = container_of(refcount,
3058                                         struct cifs_readdata, refcount);
3059 #ifdef CONFIG_CIFS_SMB_DIRECT
3060         if (rdata->mr) {
3061                 smbd_deregister_mr(rdata->mr);
3062                 rdata->mr = NULL;
3063         }
3064 #endif
3065         if (rdata->cfile)
3066                 cifsFileInfo_put(rdata->cfile);
3067
3068         kvfree(rdata->pages);
3069         kfree(rdata);
3070 }
3071
3072 static int
3073 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3074 {
3075         int rc = 0;
3076         struct page *page;
3077         unsigned int i;
3078
3079         for (i = 0; i < nr_pages; i++) {
3080                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3081                 if (!page) {
3082                         rc = -ENOMEM;
3083                         break;
3084                 }
3085                 rdata->pages[i] = page;
3086         }
3087
3088         if (rc) {
3089                 for (i = 0; i < nr_pages; i++) {
3090                         put_page(rdata->pages[i]);
3091                         rdata->pages[i] = NULL;
3092                 }
3093         }
3094         return rc;
3095 }
3096
3097 static void
3098 cifs_uncached_readdata_release(struct kref *refcount)
3099 {
3100         struct cifs_readdata *rdata = container_of(refcount,
3101                                         struct cifs_readdata, refcount);
3102         unsigned int i;
3103
3104         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3105         for (i = 0; i < rdata->nr_pages; i++) {
3106                 put_page(rdata->pages[i]);
3107         }
3108         cifs_readdata_release(refcount);
3109 }
3110
3111 /**
3112  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3113  * @rdata:      the readdata response with list of pages holding data
3114  * @iter:       destination for our data
3115  *
3116  * This function copies data from a list of pages in a readdata response into
3117  * an array of iovecs. It will first calculate where the data should go
3118  * based on the info in the readdata and then copy the data into that spot.
3119  */
3120 static int
3121 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3122 {
3123         size_t remaining = rdata->got_bytes;
3124         unsigned int i;
3125
3126         for (i = 0; i < rdata->nr_pages; i++) {
3127                 struct page *page = rdata->pages[i];
3128                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3129                 size_t written;
3130
3131                 if (unlikely(iov_iter_is_pipe(iter))) {
3132                         void *addr = kmap_atomic(page);
3133
3134                         written = copy_to_iter(addr, copy, iter);
3135                         kunmap_atomic(addr);
3136                 } else
3137                         written = copy_page_to_iter(page, 0, copy, iter);
3138                 remaining -= written;
3139                 if (written < copy && iov_iter_count(iter) > 0)
3140                         break;
3141         }
3142         return remaining ? -EFAULT : 0;
3143 }
3144
3145 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3146
3147 static void
3148 cifs_uncached_readv_complete(struct work_struct *work)
3149 {
3150         struct cifs_readdata *rdata = container_of(work,
3151                                                 struct cifs_readdata, work);
3152
3153         complete(&rdata->done);
3154         collect_uncached_read_data(rdata->ctx);
3155         /* the below call can possibly free the last ref to aio ctx */
3156         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3157 }
3158
3159 static int
3160 uncached_fill_pages(struct TCP_Server_Info *server,
3161                     struct cifs_readdata *rdata, struct iov_iter *iter,
3162                     unsigned int len)
3163 {
3164         int result = 0;
3165         unsigned int i;
3166         unsigned int nr_pages = rdata->nr_pages;
3167         unsigned int page_offset = rdata->page_offset;
3168
3169         rdata->got_bytes = 0;
3170         rdata->tailsz = PAGE_SIZE;
3171         for (i = 0; i < nr_pages; i++) {
3172                 struct page *page = rdata->pages[i];
3173                 size_t n;
3174                 unsigned int segment_size = rdata->pagesz;
3175
3176                 if (i == 0)
3177                         segment_size -= page_offset;
3178                 else
3179                         page_offset = 0;
3180
3181
3182                 if (len <= 0) {
3183                         /* no need to hold page hostage */
3184                         rdata->pages[i] = NULL;
3185                         rdata->nr_pages--;
3186                         put_page(page);
3187                         continue;
3188                 }
3189
3190                 n = len;
3191                 if (len >= segment_size)
3192                         /* enough data to fill the page */
3193                         n = segment_size;
3194                 else
3195                         rdata->tailsz = len;
3196                 len -= n;
3197
3198                 if (iter)
3199                         result = copy_page_from_iter(
3200                                         page, page_offset, n, iter);
3201 #ifdef CONFIG_CIFS_SMB_DIRECT
3202                 else if (rdata->mr)
3203                         result = n;
3204 #endif
3205                 else
3206                         result = cifs_read_page_from_socket(
3207                                         server, page, page_offset, n);
3208                 if (result < 0)
3209                         break;
3210
3211                 rdata->got_bytes += result;
3212         }
3213
3214         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3215                                                 rdata->got_bytes : result;
3216 }
3217
3218 static int
3219 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3220                               struct cifs_readdata *rdata, unsigned int len)
3221 {
3222         return uncached_fill_pages(server, rdata, NULL, len);
3223 }
3224
3225 static int
3226 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3227                               struct cifs_readdata *rdata,
3228                               struct iov_iter *iter)
3229 {
3230         return uncached_fill_pages(server, rdata, iter, iter->count);
3231 }
3232
3233 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3234                         struct list_head *rdata_list,
3235                         struct cifs_aio_ctx *ctx)
3236 {
3237         int wait_retry = 0;
3238         unsigned int rsize, credits;
3239         int rc;
3240         struct TCP_Server_Info *server =
3241                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3242
3243         /*
3244          * Try to resend this rdata, waiting for credits up to 3 seconds.
3245          * Note: we are attempting to resend the whole rdata not in segments
3246          */
3247         do {
3248                 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3249                                                 &rsize, &credits);
3250
3251                 if (rc)
3252                         break;
3253
3254                 if (rsize < rdata->bytes) {
3255                         add_credits_and_wake_if(server, credits, 0);
3256                         msleep(1000);
3257                         wait_retry++;
3258                 }
3259         } while (rsize < rdata->bytes && wait_retry < 3);
3260
3261         /*
3262          * If we can't find enough credits to send this rdata
3263          * release the rdata and return failure, this will pass
3264          * whatever I/O amount we have finished to VFS.
3265          */
3266         if (rsize < rdata->bytes) {
3267                 rc = -EBUSY;
3268                 goto out;
3269         }
3270
3271         rc = -EAGAIN;
3272         while (rc == -EAGAIN) {
3273                 rc = 0;
3274                 if (rdata->cfile->invalidHandle)
3275                         rc = cifs_reopen_file(rdata->cfile, true);
3276                 if (!rc)
3277                         rc = server->ops->async_readv(rdata);
3278         }
3279
3280         if (!rc) {
3281                 /* Add to aio pending list */
3282                 list_add_tail(&rdata->list, rdata_list);
3283                 return 0;
3284         }
3285
3286         add_credits_and_wake_if(server, rdata->credits, 0);
3287 out:
3288         kref_put(&rdata->refcount,
3289                 cifs_uncached_readdata_release);
3290
3291         return rc;
3292 }
3293
3294 static int
3295 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3296                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3297                      struct cifs_aio_ctx *ctx)
3298 {
3299         struct cifs_readdata *rdata;
3300         unsigned int npages, rsize, credits;
3301         size_t cur_len;
3302         int rc;
3303         pid_t pid;
3304         struct TCP_Server_Info *server;
3305         struct page **pagevec;
3306         size_t start;
3307         struct iov_iter direct_iov = ctx->iter;
3308
3309         server = tlink_tcon(open_file->tlink)->ses->server;
3310
3311         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3312                 pid = open_file->pid;
3313         else
3314                 pid = current->tgid;
3315
3316         if (ctx->direct_io)
3317                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3318
3319         do {
3320                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3321                                                    &rsize, &credits);
3322                 if (rc)
3323                         break;
3324
3325                 cur_len = min_t(const size_t, len, rsize);
3326
3327                 if (ctx->direct_io) {
3328                         ssize_t result;
3329
3330                         result = iov_iter_get_pages_alloc(
3331                                         &direct_iov, &pagevec,
3332                                         cur_len, &start);
3333                         if (result < 0) {
3334                                 cifs_dbg(VFS,
3335                                         "couldn't get user pages (cur_len=%zd)"
3336                                         " iter type %d"
3337                                         " iov_offset %zd count %zd\n",
3338                                         result, direct_iov.type,
3339                                         direct_iov.iov_offset,
3340                                         direct_iov.count);
3341                                 dump_stack();
3342                                 break;
3343                         }
3344                         cur_len = (size_t)result;
3345                         iov_iter_advance(&direct_iov, cur_len);
3346
3347                         rdata = cifs_readdata_direct_alloc(
3348                                         pagevec, cifs_uncached_readv_complete);
3349                         if (!rdata) {
3350                                 add_credits_and_wake_if(server, credits, 0);
3351                                 rc = -ENOMEM;
3352                                 break;
3353                         }
3354
3355                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3356                         rdata->page_offset = start;
3357                         rdata->tailsz = npages > 1 ?
3358                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3359                                 cur_len;
3360
3361                 } else {
3362
3363                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3364                         /* allocate a readdata struct */
3365                         rdata = cifs_readdata_alloc(npages,
3366                                             cifs_uncached_readv_complete);
3367                         if (!rdata) {
3368                                 add_credits_and_wake_if(server, credits, 0);
3369                                 rc = -ENOMEM;
3370                                 break;
3371                         }
3372
3373                         rc = cifs_read_allocate_pages(rdata, npages);
3374                         if (rc)
3375                                 goto error;
3376
3377                         rdata->tailsz = PAGE_SIZE;
3378                 }
3379
3380                 rdata->cfile = cifsFileInfo_get(open_file);
3381                 rdata->nr_pages = npages;
3382                 rdata->offset = offset;
3383                 rdata->bytes = cur_len;
3384                 rdata->pid = pid;
3385                 rdata->pagesz = PAGE_SIZE;
3386                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3387                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3388                 rdata->credits = credits;
3389                 rdata->ctx = ctx;
3390                 kref_get(&ctx->refcount);
3391
3392                 if (!rdata->cfile->invalidHandle ||
3393                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3394                         rc = server->ops->async_readv(rdata);
3395 error:
3396                 if (rc) {
3397                         add_credits_and_wake_if(server, rdata->credits, 0);
3398                         kref_put(&rdata->refcount,
3399                                 cifs_uncached_readdata_release);
3400                         if (rc == -EAGAIN) {
3401                                 iov_iter_revert(&direct_iov, cur_len);
3402                                 continue;
3403                         }
3404                         break;
3405                 }
3406
3407                 list_add_tail(&rdata->list, rdata_list);
3408                 offset += cur_len;
3409                 len -= cur_len;
3410         } while (len > 0);
3411
3412         return rc;
3413 }
3414
3415 static void
3416 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3417 {
3418         struct cifs_readdata *rdata, *tmp;
3419         struct iov_iter *to = &ctx->iter;
3420         struct cifs_sb_info *cifs_sb;
3421         struct cifs_tcon *tcon;
3422         unsigned int i;
3423         int rc;
3424
3425         tcon = tlink_tcon(ctx->cfile->tlink);
3426         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3427
3428         mutex_lock(&ctx->aio_mutex);
3429
3430         if (list_empty(&ctx->list)) {
3431                 mutex_unlock(&ctx->aio_mutex);
3432                 return;
3433         }
3434
3435         rc = ctx->rc;
3436         /* the loop below should proceed in the order of increasing offsets */
3437 again:
3438         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3439                 if (!rc) {
3440                         if (!try_wait_for_completion(&rdata->done)) {
3441                                 mutex_unlock(&ctx->aio_mutex);
3442                                 return;
3443                         }
3444
3445                         if (rdata->result == -EAGAIN) {
3446                                 /* resend call if it's a retryable error */
3447                                 struct list_head tmp_list;
3448                                 unsigned int got_bytes = rdata->got_bytes;
3449
3450                                 list_del_init(&rdata->list);
3451                                 INIT_LIST_HEAD(&tmp_list);
3452
3453                                 /*
3454                                  * Got a part of data and then reconnect has
3455                                  * happened -- fill the buffer and continue
3456                                  * reading.
3457                                  */
3458                                 if (got_bytes && got_bytes < rdata->bytes) {
3459                                         rc = 0;
3460                                         if (!ctx->direct_io)
3461                                                 rc = cifs_readdata_to_iov(rdata, to);
3462                                         if (rc) {
3463                                                 kref_put(&rdata->refcount,
3464                                                         cifs_uncached_readdata_release);
3465                                                 continue;
3466                                         }
3467                                 }
3468
3469                                 if (ctx->direct_io) {
3470                                         /*
3471                                          * Re-use rdata as this is a
3472                                          * direct I/O
3473                                          */
3474                                         rc = cifs_resend_rdata(
3475                                                 rdata,
3476                                                 &tmp_list, ctx);
3477                                 } else {
3478                                         rc = cifs_send_async_read(
3479                                                 rdata->offset + got_bytes,
3480                                                 rdata->bytes - got_bytes,
3481                                                 rdata->cfile, cifs_sb,
3482                                                 &tmp_list, ctx);
3483
3484                                         kref_put(&rdata->refcount,
3485                                                 cifs_uncached_readdata_release);
3486                                 }
3487
3488                                 list_splice(&tmp_list, &ctx->list);
3489
3490                                 goto again;
3491                         } else if (rdata->result)
3492                                 rc = rdata->result;
3493                         else if (!ctx->direct_io)
3494                                 rc = cifs_readdata_to_iov(rdata, to);
3495
3496                         /* if there was a short read -- discard anything left */
3497                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3498                                 rc = -ENODATA;
3499
3500                         ctx->total_len += rdata->got_bytes;
3501                 }
3502                 list_del_init(&rdata->list);
3503                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3504         }
3505
3506         if (!ctx->direct_io) {
3507                 for (i = 0; i < ctx->npages; i++) {
3508                         if (ctx->should_dirty)
3509                                 set_page_dirty(ctx->bv[i].bv_page);
3510                         put_page(ctx->bv[i].bv_page);
3511                 }
3512
3513                 ctx->total_len = ctx->len - iov_iter_count(to);
3514         }
3515
3516         cifs_stats_bytes_read(tcon, ctx->total_len);
3517
3518         /* mask nodata case */
3519         if (rc == -ENODATA)
3520                 rc = 0;
3521
3522         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3523
3524         mutex_unlock(&ctx->aio_mutex);
3525
3526         if (ctx->iocb && ctx->iocb->ki_complete)
3527                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3528         else
3529                 complete(&ctx->done);
3530 }
3531
3532 static ssize_t __cifs_readv(
3533         struct kiocb *iocb, struct iov_iter *to, bool direct)
3534 {
3535         size_t len;
3536         struct file *file = iocb->ki_filp;
3537         struct cifs_sb_info *cifs_sb;
3538         struct cifsFileInfo *cfile;
3539         struct cifs_tcon *tcon;
3540         ssize_t rc, total_read = 0;
3541         loff_t offset = iocb->ki_pos;
3542         struct cifs_aio_ctx *ctx;
3543
3544         /*
3545          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3546          * fall back to data copy read path
3547          * this could be improved by getting pages directly in ITER_KVEC
3548          */
3549         if (direct && to->type & ITER_KVEC) {
3550                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3551                 direct = false;
3552         }
3553
3554         len = iov_iter_count(to);
3555         if (!len)
3556                 return 0;
3557
3558         cifs_sb = CIFS_FILE_SB(file);
3559         cfile = file->private_data;
3560         tcon = tlink_tcon(cfile->tlink);
3561
3562         if (!tcon->ses->server->ops->async_readv)
3563                 return -ENOSYS;
3564
3565         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3566                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3567
3568         ctx = cifs_aio_ctx_alloc();
3569         if (!ctx)
3570                 return -ENOMEM;
3571
3572         ctx->cfile = cifsFileInfo_get(cfile);
3573
3574         if (!is_sync_kiocb(iocb))
3575                 ctx->iocb = iocb;
3576
3577         if (iter_is_iovec(to))
3578                 ctx->should_dirty = true;
3579
3580         if (direct) {
3581                 ctx->pos = offset;
3582                 ctx->direct_io = true;
3583                 ctx->iter = *to;
3584                 ctx->len = len;
3585         } else {
3586                 rc = setup_aio_ctx_iter(ctx, to, READ);
3587                 if (rc) {
3588                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3589                         return rc;
3590                 }
3591                 len = ctx->len;
3592         }
3593
3594         /* grab a lock here due to read response handlers can access ctx */
3595         mutex_lock(&ctx->aio_mutex);
3596
3597         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3598
3599         /* if at least one read request send succeeded, then reset rc */
3600         if (!list_empty(&ctx->list))
3601                 rc = 0;
3602
3603         mutex_unlock(&ctx->aio_mutex);
3604
3605         if (rc) {
3606                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3607                 return rc;
3608         }
3609
3610         if (!is_sync_kiocb(iocb)) {
3611                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3612                 return -EIOCBQUEUED;
3613         }
3614
3615         rc = wait_for_completion_killable(&ctx->done);
3616         if (rc) {
3617                 mutex_lock(&ctx->aio_mutex);
3618                 ctx->rc = rc = -EINTR;
3619                 total_read = ctx->total_len;
3620                 mutex_unlock(&ctx->aio_mutex);
3621         } else {
3622                 rc = ctx->rc;
3623                 total_read = ctx->total_len;
3624         }
3625
3626         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3627
3628         if (total_read) {
3629                 iocb->ki_pos += total_read;
3630                 return total_read;
3631         }
3632         return rc;
3633 }
3634
3635 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3636 {
3637         return __cifs_readv(iocb, to, true);
3638 }
3639
3640 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3641 {
3642         return __cifs_readv(iocb, to, false);
3643 }
3644
3645 ssize_t
3646 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3647 {
3648         struct inode *inode = file_inode(iocb->ki_filp);
3649         struct cifsInodeInfo *cinode = CIFS_I(inode);
3650         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3651         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3652                                                 iocb->ki_filp->private_data;
3653         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3654         int rc = -EACCES;
3655
3656         /*
3657          * In strict cache mode we need to read from the server all the time
3658          * if we don't have level II oplock because the server can delay mtime
3659          * change - so we can't make a decision about inode invalidating.
3660          * And we can also fail with pagereading if there are mandatory locks
3661          * on pages affected by this read but not on the region from pos to
3662          * pos+len-1.
3663          */
3664         if (!CIFS_CACHE_READ(cinode))
3665                 return cifs_user_readv(iocb, to);
3666
3667         if (cap_unix(tcon->ses) &&
3668             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3669             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3670                 return generic_file_read_iter(iocb, to);
3671
3672         /*
3673          * We need to hold the sem to be sure nobody modifies lock list
3674          * with a brlock that prevents reading.
3675          */
3676         down_read(&cinode->lock_sem);
3677         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3678                                      tcon->ses->server->vals->shared_lock_type,
3679                                      0, NULL, CIFS_READ_OP))
3680                 rc = generic_file_read_iter(iocb, to);
3681         up_read(&cinode->lock_sem);
3682         return rc;
3683 }
3684
3685 static ssize_t
3686 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3687 {
3688         int rc = -EACCES;
3689         unsigned int bytes_read = 0;
3690         unsigned int total_read;
3691         unsigned int current_read_size;
3692         unsigned int rsize;
3693         struct cifs_sb_info *cifs_sb;
3694         struct cifs_tcon *tcon;
3695         struct TCP_Server_Info *server;
3696         unsigned int xid;
3697         char *cur_offset;
3698         struct cifsFileInfo *open_file;
3699         struct cifs_io_parms io_parms;
3700         int buf_type = CIFS_NO_BUFFER;
3701         __u32 pid;
3702
3703         xid = get_xid();
3704         cifs_sb = CIFS_FILE_SB(file);
3705
3706         /* FIXME: set up handlers for larger reads and/or convert to async */
3707         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3708
3709         if (file->private_data == NULL) {
3710                 rc = -EBADF;
3711                 free_xid(xid);
3712                 return rc;
3713         }
3714         open_file = file->private_data;
3715         tcon = tlink_tcon(open_file->tlink);
3716         server = tcon->ses->server;
3717
3718         if (!server->ops->sync_read) {
3719                 free_xid(xid);
3720                 return -ENOSYS;
3721         }
3722
3723         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3724                 pid = open_file->pid;
3725         else
3726                 pid = current->tgid;
3727
3728         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3729                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3730
3731         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3732              total_read += bytes_read, cur_offset += bytes_read) {
3733                 do {
3734                         current_read_size = min_t(uint, read_size - total_read,
3735                                                   rsize);
3736                         /*
3737                          * For windows me and 9x we do not want to request more
3738                          * than it negotiated since it will refuse the read
3739                          * then.
3740                          */
3741                         if ((tcon->ses) && !(tcon->ses->capabilities &
3742                                 tcon->ses->server->vals->cap_large_files)) {
3743                                 current_read_size = min_t(uint,
3744                                         current_read_size, CIFSMaxBufSize);
3745                         }
3746                         if (open_file->invalidHandle) {
3747                                 rc = cifs_reopen_file(open_file, true);
3748                                 if (rc != 0)
3749                                         break;
3750                         }
3751                         io_parms.pid = pid;
3752                         io_parms.tcon = tcon;
3753                         io_parms.offset = *offset;
3754                         io_parms.length = current_read_size;
3755                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3756                                                     &bytes_read, &cur_offset,
3757                                                     &buf_type);
3758                 } while (rc == -EAGAIN);
3759
3760                 if (rc || (bytes_read == 0)) {
3761                         if (total_read) {
3762                                 break;
3763                         } else {
3764                                 free_xid(xid);
3765                                 return rc;
3766                         }
3767                 } else {
3768                         cifs_stats_bytes_read(tcon, total_read);
3769                         *offset += bytes_read;
3770                 }
3771         }
3772         free_xid(xid);
3773         return total_read;
3774 }
3775
3776 /*
3777  * If the page is mmap'ed into a process' page tables, then we need to make
3778  * sure that it doesn't change while being written back.
3779  */
3780 static vm_fault_t
3781 cifs_page_mkwrite(struct vm_fault *vmf)
3782 {
3783         struct page *page = vmf->page;
3784
3785         lock_page(page);
3786         return VM_FAULT_LOCKED;
3787 }
3788
3789 static const struct vm_operations_struct cifs_file_vm_ops = {
3790         .fault = filemap_fault,
3791         .map_pages = filemap_map_pages,
3792         .page_mkwrite = cifs_page_mkwrite,
3793 };
3794
3795 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3796 {
3797         int xid, rc = 0;
3798         struct inode *inode = file_inode(file);
3799
3800         xid = get_xid();
3801
3802         if (!CIFS_CACHE_READ(CIFS_I(inode)))
3803                 rc = cifs_zap_mapping(inode);
3804         if (!rc)
3805                 rc = generic_file_mmap(file, vma);
3806         if (!rc)
3807                 vma->vm_ops = &cifs_file_vm_ops;
3808
3809         free_xid(xid);
3810         return rc;
3811 }
3812
3813 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3814 {
3815         int rc, xid;
3816
3817         xid = get_xid();
3818
3819         rc = cifs_revalidate_file(file);
3820         if (rc)
3821                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3822                          rc);
3823         if (!rc)
3824                 rc = generic_file_mmap(file, vma);
3825         if (!rc)
3826                 vma->vm_ops = &cifs_file_vm_ops;
3827
3828         free_xid(xid);
3829         return rc;
3830 }
3831
3832 static void
3833 cifs_readv_complete(struct work_struct *work)
3834 {
3835         unsigned int i, got_bytes;
3836         struct cifs_readdata *rdata = container_of(work,
3837                                                 struct cifs_readdata, work);
3838
3839         got_bytes = rdata->got_bytes;
3840         for (i = 0; i < rdata->nr_pages; i++) {
3841                 struct page *page = rdata->pages[i];
3842
3843                 lru_cache_add_file(page);
3844
3845                 if (rdata->result == 0 ||
3846                     (rdata->result == -EAGAIN && got_bytes)) {
3847                         flush_dcache_page(page);
3848                         SetPageUptodate(page);
3849                 }
3850
3851                 unlock_page(page);
3852
3853                 if (rdata->result == 0 ||
3854                     (rdata->result == -EAGAIN && got_bytes))
3855                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3856
3857                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3858
3859                 put_page(page);
3860                 rdata->pages[i] = NULL;
3861         }
3862         kref_put(&rdata->refcount, cifs_readdata_release);
3863 }
3864
3865 static int
3866 readpages_fill_pages(struct TCP_Server_Info *server,
3867                      struct cifs_readdata *rdata, struct iov_iter *iter,
3868                      unsigned int len)
3869 {
3870         int result = 0;
3871         unsigned int i;
3872         u64 eof;
3873         pgoff_t eof_index;
3874         unsigned int nr_pages = rdata->nr_pages;
3875         unsigned int page_offset = rdata->page_offset;
3876
3877         /* determine the eof that the server (probably) has */
3878         eof = CIFS_I(rdata->mapping->host)->server_eof;
3879         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3880         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3881
3882         rdata->got_bytes = 0;
3883         rdata->tailsz = PAGE_SIZE;
3884         for (i = 0; i < nr_pages; i++) {
3885                 struct page *page = rdata->pages[i];
3886                 unsigned int to_read = rdata->pagesz;
3887                 size_t n;
3888
3889                 if (i == 0)
3890                         to_read -= page_offset;
3891                 else
3892                         page_offset = 0;
3893
3894                 n = to_read;
3895
3896                 if (len >= to_read) {
3897                         len -= to_read;
3898                 } else if (len > 0) {
3899                         /* enough for partial page, fill and zero the rest */
3900                         zero_user(page, len + page_offset, to_read - len);
3901                         n = rdata->tailsz = len;
3902                         len = 0;
3903                 } else if (page->index > eof_index) {
3904                         /*
3905                          * The VFS will not try to do readahead past the
3906                          * i_size, but it's possible that we have outstanding
3907                          * writes with gaps in the middle and the i_size hasn't
3908                          * caught up yet. Populate those with zeroed out pages
3909                          * to prevent the VFS from repeatedly attempting to
3910                          * fill them until the writes are flushed.
3911                          */
3912                         zero_user(page, 0, PAGE_SIZE);
3913                         lru_cache_add_file(page);
3914                         flush_dcache_page(page);
3915                         SetPageUptodate(page);
3916                         unlock_page(page);
3917                         put_page(page);
3918                         rdata->pages[i] = NULL;
3919                         rdata->nr_pages--;
3920                         continue;
3921                 } else {
3922                         /* no need to hold page hostage */
3923                         lru_cache_add_file(page);
3924                         unlock_page(page);
3925                         put_page(page);
3926                         rdata->pages[i] = NULL;
3927                         rdata->nr_pages--;
3928                         continue;
3929                 }
3930
3931                 if (iter)
3932                         result = copy_page_from_iter(
3933                                         page, page_offset, n, iter);
3934 #ifdef CONFIG_CIFS_SMB_DIRECT
3935                 else if (rdata->mr)
3936                         result = n;
3937 #endif
3938                 else
3939                         result = cifs_read_page_from_socket(
3940                                         server, page, page_offset, n);
3941                 if (result < 0)
3942                         break;
3943
3944                 rdata->got_bytes += result;
3945         }
3946
3947         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3948                                                 rdata->got_bytes : result;
3949 }
3950
3951 static int
3952 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3953                                struct cifs_readdata *rdata, unsigned int len)
3954 {
3955         return readpages_fill_pages(server, rdata, NULL, len);
3956 }
3957
3958 static int
3959 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3960                                struct cifs_readdata *rdata,
3961                                struct iov_iter *iter)
3962 {
3963         return readpages_fill_pages(server, rdata, iter, iter->count);
3964 }
3965
3966 static int
3967 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3968                     unsigned int rsize, struct list_head *tmplist,
3969                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3970 {
3971         struct page *page, *tpage;
3972         unsigned int expected_index;
3973         int rc;
3974         gfp_t gfp = readahead_gfp_mask(mapping);
3975
3976         INIT_LIST_HEAD(tmplist);
3977
3978         page = list_entry(page_list->prev, struct page, lru);
3979
3980         /*
3981          * Lock the page and put it in the cache. Since no one else
3982          * should have access to this page, we're safe to simply set
3983          * PG_locked without checking it first.
3984          */
3985         __SetPageLocked(page);
3986         rc = add_to_page_cache_locked(page, mapping,
3987                                       page->index, gfp);
3988
3989         /* give up if we can't stick it in the cache */
3990         if (rc) {
3991                 __ClearPageLocked(page);
3992                 return rc;
3993         }
3994
3995         /* move first page to the tmplist */
3996         *offset = (loff_t)page->index << PAGE_SHIFT;
3997         *bytes = PAGE_SIZE;
3998         *nr_pages = 1;
3999         list_move_tail(&page->lru, tmplist);
4000
4001         /* now try and add more pages onto the request */
4002         expected_index = page->index + 1;
4003         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4004                 /* discontinuity ? */
4005                 if (page->index != expected_index)
4006                         break;
4007
4008                 /* would this page push the read over the rsize? */
4009                 if (*bytes + PAGE_SIZE > rsize)
4010                         break;
4011
4012                 __SetPageLocked(page);
4013                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4014                         __ClearPageLocked(page);
4015                         break;
4016                 }
4017                 list_move_tail(&page->lru, tmplist);
4018                 (*bytes) += PAGE_SIZE;
4019                 expected_index++;
4020                 (*nr_pages)++;
4021         }
4022         return rc;
4023 }
4024
4025 static int cifs_readpages(struct file *file, struct address_space *mapping,
4026         struct list_head *page_list, unsigned num_pages)
4027 {
4028         int rc;
4029         struct list_head tmplist;
4030         struct cifsFileInfo *open_file = file->private_data;
4031         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4032         struct TCP_Server_Info *server;
4033         pid_t pid;
4034         unsigned int xid;
4035
4036         xid = get_xid();
4037         /*
4038          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4039          * immediately if the cookie is negative
4040          *
4041          * After this point, every page in the list might have PG_fscache set,
4042          * so we will need to clean that up off of every page we don't use.
4043          */
4044         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4045                                          &num_pages);
4046         if (rc == 0) {
4047                 free_xid(xid);
4048                 return rc;
4049         }
4050
4051         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4052                 pid = open_file->pid;
4053         else
4054                 pid = current->tgid;
4055
4056         rc = 0;
4057         server = tlink_tcon(open_file->tlink)->ses->server;
4058
4059         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4060                  __func__, file, mapping, num_pages);
4061
4062         /*
4063          * Start with the page at end of list and move it to private
4064          * list. Do the same with any following pages until we hit
4065          * the rsize limit, hit an index discontinuity, or run out of
4066          * pages. Issue the async read and then start the loop again
4067          * until the list is empty.
4068          *
4069          * Note that list order is important. The page_list is in
4070          * the order of declining indexes. When we put the pages in
4071          * the rdata->pages, then we want them in increasing order.
4072          */
4073         while (!list_empty(page_list)) {
4074                 unsigned int i, nr_pages, bytes, rsize;
4075                 loff_t offset;
4076                 struct page *page, *tpage;
4077                 struct cifs_readdata *rdata;
4078                 unsigned credits;
4079
4080                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4081                                                    &rsize, &credits);
4082                 if (rc)
4083                         break;
4084
4085                 /*
4086                  * Give up immediately if rsize is too small to read an entire
4087                  * page. The VFS will fall back to readpage. We should never
4088                  * reach this point however since we set ra_pages to 0 when the
4089                  * rsize is smaller than a cache page.
4090                  */
4091                 if (unlikely(rsize < PAGE_SIZE)) {
4092                         add_credits_and_wake_if(server, credits, 0);
4093                         free_xid(xid);
4094                         return 0;
4095                 }
4096
4097                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4098                                          &nr_pages, &offset, &bytes);
4099                 if (rc) {
4100                         add_credits_and_wake_if(server, credits, 0);
4101                         break;
4102                 }
4103
4104                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4105                 if (!rdata) {
4106                         /* best to give up if we're out of mem */
4107                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4108                                 list_del(&page->lru);
4109                                 lru_cache_add_file(page);
4110                                 unlock_page(page);
4111                                 put_page(page);
4112                         }
4113                         rc = -ENOMEM;
4114                         add_credits_and_wake_if(server, credits, 0);
4115                         break;
4116                 }
4117
4118                 rdata->cfile = cifsFileInfo_get(open_file);
4119                 rdata->mapping = mapping;
4120                 rdata->offset = offset;
4121                 rdata->bytes = bytes;
4122                 rdata->pid = pid;
4123                 rdata->pagesz = PAGE_SIZE;
4124                 rdata->tailsz = PAGE_SIZE;
4125                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4126                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4127                 rdata->credits = credits;
4128
4129                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4130                         list_del(&page->lru);
4131                         rdata->pages[rdata->nr_pages++] = page;
4132                 }
4133
4134                 if (!rdata->cfile->invalidHandle ||
4135                     !(rc = cifs_reopen_file(rdata->cfile, true)))
4136                         rc = server->ops->async_readv(rdata);
4137                 if (rc) {
4138                         add_credits_and_wake_if(server, rdata->credits, 0);
4139                         for (i = 0; i < rdata->nr_pages; i++) {
4140                                 page = rdata->pages[i];
4141                                 lru_cache_add_file(page);
4142                                 unlock_page(page);
4143                                 put_page(page);
4144                         }
4145                         /* Fallback to the readpage in error/reconnect cases */
4146                         kref_put(&rdata->refcount, cifs_readdata_release);
4147                         break;
4148                 }
4149
4150                 kref_put(&rdata->refcount, cifs_readdata_release);
4151         }
4152
4153         /* Any pages that have been shown to fscache but didn't get added to
4154          * the pagecache must be uncached before they get returned to the
4155          * allocator.
4156          */
4157         cifs_fscache_readpages_cancel(mapping->host, page_list);
4158         free_xid(xid);
4159         return rc;
4160 }
4161
4162 /*
4163  * cifs_readpage_worker must be called with the page pinned
4164  */
4165 static int cifs_readpage_worker(struct file *file, struct page *page,
4166         loff_t *poffset)
4167 {
4168         char *read_data;
4169         int rc;
4170
4171         /* Is the page cached? */
4172         rc = cifs_readpage_from_fscache(file_inode(file), page);
4173         if (rc == 0)
4174                 goto read_complete;
4175
4176         read_data = kmap(page);
4177         /* for reads over a certain size could initiate async read ahead */
4178
4179         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4180
4181         if (rc < 0)
4182                 goto io_error;
4183         else
4184                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4185
4186         /* we do not want atime to be less than mtime, it broke some apps */
4187         file_inode(file)->i_atime = current_time(file_inode(file));
4188         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4189                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4190         else
4191                 file_inode(file)->i_atime = current_time(file_inode(file));
4192
4193         if (PAGE_SIZE > rc)
4194                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4195
4196         flush_dcache_page(page);
4197         SetPageUptodate(page);
4198
4199         /* send this page to the cache */
4200         cifs_readpage_to_fscache(file_inode(file), page);
4201
4202         rc = 0;
4203
4204 io_error:
4205         kunmap(page);
4206         unlock_page(page);
4207
4208 read_complete:
4209         return rc;
4210 }
4211
4212 static int cifs_readpage(struct file *file, struct page *page)
4213 {
4214         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4215         int rc = -EACCES;
4216         unsigned int xid;
4217
4218         xid = get_xid();
4219
4220         if (file->private_data == NULL) {
4221                 rc = -EBADF;
4222                 free_xid(xid);
4223                 return rc;
4224         }
4225
4226         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4227                  page, (int)offset, (int)offset);
4228
4229         rc = cifs_readpage_worker(file, page, &offset);
4230
4231         free_xid(xid);
4232         return rc;
4233 }
4234
4235 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4236 {
4237         struct cifsFileInfo *open_file;
4238         struct cifs_tcon *tcon =
4239                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4240
4241         spin_lock(&tcon->open_file_lock);
4242         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4243                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4244                         spin_unlock(&tcon->open_file_lock);
4245                         return 1;
4246                 }
4247         }
4248         spin_unlock(&tcon->open_file_lock);
4249         return 0;
4250 }
4251
4252 /* We do not want to update the file size from server for inodes
4253    open for write - to avoid races with writepage extending
4254    the file - in the future we could consider allowing
4255    refreshing the inode only on increases in the file size
4256    but this is tricky to do without racing with writebehind
4257    page caching in the current Linux kernel design */
4258 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4259 {
4260         if (!cifsInode)
4261                 return true;
4262
4263         if (is_inode_writable(cifsInode)) {
4264                 /* This inode is open for write at least once */
4265                 struct cifs_sb_info *cifs_sb;
4266
4267                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4268                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4269                         /* since no page cache to corrupt on directio
4270                         we can change size safely */
4271                         return true;
4272                 }
4273
4274                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4275                         return true;
4276
4277                 return false;
4278         } else
4279                 return true;
4280 }
4281
4282 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4283                         loff_t pos, unsigned len, unsigned flags,
4284                         struct page **pagep, void **fsdata)
4285 {
4286         int oncethru = 0;
4287         pgoff_t index = pos >> PAGE_SHIFT;
4288         loff_t offset = pos & (PAGE_SIZE - 1);
4289         loff_t page_start = pos & PAGE_MASK;
4290         loff_t i_size;
4291         struct page *page;
4292         int rc = 0;
4293
4294         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4295
4296 start:
4297         page = grab_cache_page_write_begin(mapping, index, flags);
4298         if (!page) {
4299                 rc = -ENOMEM;
4300                 goto out;
4301         }
4302
4303         if (PageUptodate(page))
4304                 goto out;
4305
4306         /*
4307          * If we write a full page it will be up to date, no need to read from
4308          * the server. If the write is short, we'll end up doing a sync write
4309          * instead.
4310          */
4311         if (len == PAGE_SIZE)
4312                 goto out;
4313
4314         /*
4315          * optimize away the read when we have an oplock, and we're not
4316          * expecting to use any of the data we'd be reading in. That
4317          * is, when the page lies beyond the EOF, or straddles the EOF
4318          * and the write will cover all of the existing data.
4319          */
4320         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4321                 i_size = i_size_read(mapping->host);
4322                 if (page_start >= i_size ||
4323                     (offset == 0 && (pos + len) >= i_size)) {
4324                         zero_user_segments(page, 0, offset,
4325                                            offset + len,
4326                                            PAGE_SIZE);
4327                         /*
4328                          * PageChecked means that the parts of the page
4329                          * to which we're not writing are considered up
4330                          * to date. Once the data is copied to the
4331                          * page, it can be set uptodate.
4332                          */
4333                         SetPageChecked(page);
4334                         goto out;
4335                 }
4336         }
4337
4338         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4339                 /*
4340                  * might as well read a page, it is fast enough. If we get
4341                  * an error, we don't need to return it. cifs_write_end will
4342                  * do a sync write instead since PG_uptodate isn't set.
4343                  */
4344                 cifs_readpage_worker(file, page, &page_start);
4345                 put_page(page);
4346                 oncethru = 1;
4347                 goto start;
4348         } else {
4349                 /* we could try using another file handle if there is one -
4350                    but how would we lock it to prevent close of that handle
4351                    racing with this read? In any case
4352                    this will be written out by write_end so is fine */
4353         }
4354 out:
4355         *pagep = page;
4356         return rc;
4357 }
4358
4359 static int cifs_release_page(struct page *page, gfp_t gfp)
4360 {
4361         if (PagePrivate(page))
4362                 return 0;
4363
4364         return cifs_fscache_release_page(page, gfp);
4365 }
4366
4367 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4368                                  unsigned int length)
4369 {
4370         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4371
4372         if (offset == 0 && length == PAGE_SIZE)
4373                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4374 }
4375
4376 static int cifs_launder_page(struct page *page)
4377 {
4378         int rc = 0;
4379         loff_t range_start = page_offset(page);
4380         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4381         struct writeback_control wbc = {
4382                 .sync_mode = WB_SYNC_ALL,
4383                 .nr_to_write = 0,
4384                 .range_start = range_start,
4385                 .range_end = range_end,
4386         };
4387
4388         cifs_dbg(FYI, "Launder page: %p\n", page);
4389
4390         if (clear_page_dirty_for_io(page))
4391                 rc = cifs_writepage_locked(page, &wbc);
4392
4393         cifs_fscache_invalidate_page(page, page->mapping->host);
4394         return rc;
4395 }
4396
4397 void cifs_oplock_break(struct work_struct *work)
4398 {
4399         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4400                                                   oplock_break);
4401         struct inode *inode = d_inode(cfile->dentry);
4402         struct cifsInodeInfo *cinode = CIFS_I(inode);
4403         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4404         struct TCP_Server_Info *server = tcon->ses->server;
4405         int rc = 0;
4406
4407         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4408                         TASK_UNINTERRUPTIBLE);
4409
4410         server->ops->downgrade_oplock(server, cinode,
4411                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4412
4413         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4414                                                 cifs_has_mand_locks(cinode)) {
4415                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4416                          inode);
4417                 cinode->oplock = 0;
4418         }
4419
4420         if (inode && S_ISREG(inode->i_mode)) {
4421                 if (CIFS_CACHE_READ(cinode))
4422                         break_lease(inode, O_RDONLY);
4423                 else
4424                         break_lease(inode, O_WRONLY);
4425                 rc = filemap_fdatawrite(inode->i_mapping);
4426                 if (!CIFS_CACHE_READ(cinode)) {
4427                         rc = filemap_fdatawait(inode->i_mapping);
4428                         mapping_set_error(inode->i_mapping, rc);
4429                         cifs_zap_mapping(inode);
4430                 }
4431                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4432         }
4433
4434         rc = cifs_push_locks(cfile);
4435         if (rc)
4436                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4437
4438         /*
4439          * releasing stale oplock after recent reconnect of smb session using
4440          * a now incorrect file handle is not a data integrity issue but do
4441          * not bother sending an oplock release if session to server still is
4442          * disconnected since oplock already released by the server
4443          */
4444         if (!cfile->oplock_break_cancelled) {
4445                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4446                                                              cinode);
4447                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4448         }
4449         cifs_done_oplock_break(cinode);
4450 }
4451
4452 /*
4453  * The presence of cifs_direct_io() in the address space ops vector
4454  * allowes open() O_DIRECT flags which would have failed otherwise.
4455  *
4456  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4457  * so this method should never be called.
4458  *
4459  * Direct IO is not yet supported in the cached mode. 
4460  */
4461 static ssize_t
4462 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4463 {
4464         /*
4465          * FIXME
4466          * Eventually need to support direct IO for non forcedirectio mounts
4467          */
4468         return -EINVAL;
4469 }
4470
4471
4472 const struct address_space_operations cifs_addr_ops = {
4473         .readpage = cifs_readpage,
4474         .readpages = cifs_readpages,
4475         .writepage = cifs_writepage,
4476         .writepages = cifs_writepages,
4477         .write_begin = cifs_write_begin,
4478         .write_end = cifs_write_end,
4479         .set_page_dirty = __set_page_dirty_nobuffers,
4480         .releasepage = cifs_release_page,
4481         .direct_IO = cifs_direct_io,
4482         .invalidatepage = cifs_invalidate_page,
4483         .launder_page = cifs_launder_page,
4484 };
4485
4486 /*
4487  * cifs_readpages requires the server to support a buffer large enough to
4488  * contain the header plus one complete page of data.  Otherwise, we need
4489  * to leave cifs_readpages out of the address space operations.
4490  */
4491 const struct address_space_operations cifs_addr_ops_smallbuf = {
4492         .readpage = cifs_readpage,
4493         .writepage = cifs_writepage,
4494         .writepages = cifs_writepages,
4495         .write_begin = cifs_write_begin,
4496         .write_end = cifs_write_end,
4497         .set_page_dirty = __set_page_dirty_nobuffers,
4498         .releasepage = cifs_release_page,
4499         .invalidatepage = cifs_invalidate_page,
4500         .launder_page = cifs_launder_page,
4501 };
This page took 0.293042 seconds and 4 git commands to generate.