]> Git Repo - linux.git/blob - fs/cifs/file.c
mmc: sdhci-pci-o2micro: Improve card input timing at SDR104/HS200 mode
[linux.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French ([email protected])
8  *              Jeremy Allison ([email protected])
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "cifs_unicode.h"
30 #include "cifs_debug.h"
31 #include "cifs_fs_sb.h"
32 #include "fscache.h"
33 #include "smbdirect.h"
34 #include "fs_context.h"
35 #include "cifs_ioctl.h"
36
37 static inline int cifs_convert_flags(unsigned int flags)
38 {
39         if ((flags & O_ACCMODE) == O_RDONLY)
40                 return GENERIC_READ;
41         else if ((flags & O_ACCMODE) == O_WRONLY)
42                 return GENERIC_WRITE;
43         else if ((flags & O_ACCMODE) == O_RDWR) {
44                 /* GENERIC_ALL is too much permission to request
45                    can cause unnecessary access denied on create */
46                 /* return GENERIC_ALL; */
47                 return (GENERIC_READ | GENERIC_WRITE);
48         }
49
50         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
51                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
52                 FILE_READ_DATA);
53 }
54
55 static u32 cifs_posix_convert_flags(unsigned int flags)
56 {
57         u32 posix_flags = 0;
58
59         if ((flags & O_ACCMODE) == O_RDONLY)
60                 posix_flags = SMB_O_RDONLY;
61         else if ((flags & O_ACCMODE) == O_WRONLY)
62                 posix_flags = SMB_O_WRONLY;
63         else if ((flags & O_ACCMODE) == O_RDWR)
64                 posix_flags = SMB_O_RDWR;
65
66         if (flags & O_CREAT) {
67                 posix_flags |= SMB_O_CREAT;
68                 if (flags & O_EXCL)
69                         posix_flags |= SMB_O_EXCL;
70         } else if (flags & O_EXCL)
71                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
72                          current->comm, current->tgid);
73
74         if (flags & O_TRUNC)
75                 posix_flags |= SMB_O_TRUNC;
76         /* be safe and imply O_SYNC for O_DSYNC */
77         if (flags & O_DSYNC)
78                 posix_flags |= SMB_O_SYNC;
79         if (flags & O_DIRECTORY)
80                 posix_flags |= SMB_O_DIRECTORY;
81         if (flags & O_NOFOLLOW)
82                 posix_flags |= SMB_O_NOFOLLOW;
83         if (flags & O_DIRECT)
84                 posix_flags |= SMB_O_DIRECT;
85
86         return posix_flags;
87 }
88
89 static inline int cifs_get_disposition(unsigned int flags)
90 {
91         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
92                 return FILE_CREATE;
93         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
94                 return FILE_OVERWRITE_IF;
95         else if ((flags & O_CREAT) == O_CREAT)
96                 return FILE_OPEN_IF;
97         else if ((flags & O_TRUNC) == O_TRUNC)
98                 return FILE_OVERWRITE;
99         else
100                 return FILE_OPEN;
101 }
102
103 int cifs_posix_open(const char *full_path, struct inode **pinode,
104                         struct super_block *sb, int mode, unsigned int f_flags,
105                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
106 {
107         int rc;
108         FILE_UNIX_BASIC_INFO *presp_data;
109         __u32 posix_flags = 0;
110         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
111         struct cifs_fattr fattr;
112         struct tcon_link *tlink;
113         struct cifs_tcon *tcon;
114
115         cifs_dbg(FYI, "posix open %s\n", full_path);
116
117         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
118         if (presp_data == NULL)
119                 return -ENOMEM;
120
121         tlink = cifs_sb_tlink(cifs_sb);
122         if (IS_ERR(tlink)) {
123                 rc = PTR_ERR(tlink);
124                 goto posix_open_ret;
125         }
126
127         tcon = tlink_tcon(tlink);
128         mode &= ~current_umask();
129
130         posix_flags = cifs_posix_convert_flags(f_flags);
131         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
132                              poplock, full_path, cifs_sb->local_nls,
133                              cifs_remap(cifs_sb));
134         cifs_put_tlink(tlink);
135
136         if (rc)
137                 goto posix_open_ret;
138
139         if (presp_data->Type == cpu_to_le32(-1))
140                 goto posix_open_ret; /* open ok, caller does qpathinfo */
141
142         if (!pinode)
143                 goto posix_open_ret; /* caller does not need info */
144
145         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
146
147         /* get new inode and set it up */
148         if (*pinode == NULL) {
149                 cifs_fill_uniqueid(sb, &fattr);
150                 *pinode = cifs_iget(sb, &fattr);
151                 if (!*pinode) {
152                         rc = -ENOMEM;
153                         goto posix_open_ret;
154                 }
155         } else {
156                 cifs_revalidate_mapping(*pinode);
157                 rc = cifs_fattr_to_inode(*pinode, &fattr);
158         }
159
160 posix_open_ret:
161         kfree(presp_data);
162         return rc;
163 }
164
165 static int
166 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
167              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
168              struct cifs_fid *fid, unsigned int xid)
169 {
170         int rc;
171         int desired_access;
172         int disposition;
173         int create_options = CREATE_NOT_DIR;
174         FILE_ALL_INFO *buf;
175         struct TCP_Server_Info *server = tcon->ses->server;
176         struct cifs_open_parms oparms;
177
178         if (!server->ops->open)
179                 return -ENOSYS;
180
181         desired_access = cifs_convert_flags(f_flags);
182
183 /*********************************************************************
184  *  open flag mapping table:
185  *
186  *      POSIX Flag            CIFS Disposition
187  *      ----------            ----------------
188  *      O_CREAT               FILE_OPEN_IF
189  *      O_CREAT | O_EXCL      FILE_CREATE
190  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
191  *      O_TRUNC               FILE_OVERWRITE
192  *      none of the above     FILE_OPEN
193  *
194  *      Note that there is not a direct match between disposition
195  *      FILE_SUPERSEDE (ie create whether or not file exists although
196  *      O_CREAT | O_TRUNC is similar but truncates the existing
197  *      file rather than creating a new file as FILE_SUPERSEDE does
198  *      (which uses the attributes / metadata passed in on open call)
199  *?
200  *?  O_SYNC is a reasonable match to CIFS writethrough flag
201  *?  and the read write flags match reasonably.  O_LARGEFILE
202  *?  is irrelevant because largefile support is always used
203  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205  *********************************************************************/
206
207         disposition = cifs_get_disposition(f_flags);
208
209         /* BB pass O_SYNC flag through on file attributes .. BB */
210
211         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212         if (!buf)
213                 return -ENOMEM;
214
215         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
216         if (f_flags & O_SYNC)
217                 create_options |= CREATE_WRITE_THROUGH;
218
219         if (f_flags & O_DIRECT)
220                 create_options |= CREATE_NO_BUFFER;
221
222         oparms.tcon = tcon;
223         oparms.cifs_sb = cifs_sb;
224         oparms.desired_access = desired_access;
225         oparms.create_options = cifs_create_options(cifs_sb, create_options);
226         oparms.disposition = disposition;
227         oparms.path = full_path;
228         oparms.fid = fid;
229         oparms.reconnect = false;
230
231         rc = server->ops->open(xid, &oparms, oplock, buf);
232
233         if (rc)
234                 goto out;
235
236         /* TODO: Add support for calling posix query info but with passing in fid */
237         if (tcon->unix_ext)
238                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
239                                               xid);
240         else
241                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
242                                          xid, fid);
243
244         if (rc) {
245                 server->ops->close(xid, tcon, fid);
246                 if (rc == -ESTALE)
247                         rc = -EOPENSTALE;
248         }
249
250 out:
251         kfree(buf);
252         return rc;
253 }
254
255 static bool
256 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 {
258         struct cifs_fid_locks *cur;
259         bool has_locks = false;
260
261         down_read(&cinode->lock_sem);
262         list_for_each_entry(cur, &cinode->llist, llist) {
263                 if (!list_empty(&cur->locks)) {
264                         has_locks = true;
265                         break;
266                 }
267         }
268         up_read(&cinode->lock_sem);
269         return has_locks;
270 }
271
272 void
273 cifs_down_write(struct rw_semaphore *sem)
274 {
275         while (!down_write_trylock(sem))
276                 msleep(10);
277 }
278
279 static void cifsFileInfo_put_work(struct work_struct *work);
280
281 struct cifsFileInfo *
282 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
283                   struct tcon_link *tlink, __u32 oplock)
284 {
285         struct dentry *dentry = file_dentry(file);
286         struct inode *inode = d_inode(dentry);
287         struct cifsInodeInfo *cinode = CIFS_I(inode);
288         struct cifsFileInfo *cfile;
289         struct cifs_fid_locks *fdlocks;
290         struct cifs_tcon *tcon = tlink_tcon(tlink);
291         struct TCP_Server_Info *server = tcon->ses->server;
292
293         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
294         if (cfile == NULL)
295                 return cfile;
296
297         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
298         if (!fdlocks) {
299                 kfree(cfile);
300                 return NULL;
301         }
302
303         INIT_LIST_HEAD(&fdlocks->locks);
304         fdlocks->cfile = cfile;
305         cfile->llist = fdlocks;
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->deferred_close_scheduled = false;
314         cfile->tlink = cifs_get_tlink(tlink);
315         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
316         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
317         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
318         mutex_init(&cfile->fh_mutex);
319         spin_lock_init(&cfile->file_info_lock);
320
321         cifs_sb_active(inode->i_sb);
322
323         /*
324          * If the server returned a read oplock and we have mandatory brlocks,
325          * set oplock level to None.
326          */
327         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
328                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
329                 oplock = 0;
330         }
331
332         cifs_down_write(&cinode->lock_sem);
333         list_add(&fdlocks->llist, &cinode->llist);
334         up_write(&cinode->lock_sem);
335
336         spin_lock(&tcon->open_file_lock);
337         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
338                 oplock = fid->pending_open->oplock;
339         list_del(&fid->pending_open->olist);
340
341         fid->purge_cache = false;
342         server->ops->set_fid(cfile, fid, oplock);
343
344         list_add(&cfile->tlist, &tcon->openFileList);
345         atomic_inc(&tcon->num_local_opens);
346
347         /* if readable file instance put first in list*/
348         spin_lock(&cinode->open_file_lock);
349         if (file->f_mode & FMODE_READ)
350                 list_add(&cfile->flist, &cinode->openFileList);
351         else
352                 list_add_tail(&cfile->flist, &cinode->openFileList);
353         spin_unlock(&cinode->open_file_lock);
354         spin_unlock(&tcon->open_file_lock);
355
356         if (fid->purge_cache)
357                 cifs_zap_mapping(inode);
358
359         file->private_data = cfile;
360         return cfile;
361 }
362
363 struct cifsFileInfo *
364 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
365 {
366         spin_lock(&cifs_file->file_info_lock);
367         cifsFileInfo_get_locked(cifs_file);
368         spin_unlock(&cifs_file->file_info_lock);
369         return cifs_file;
370 }
371
372 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
373 {
374         struct inode *inode = d_inode(cifs_file->dentry);
375         struct cifsInodeInfo *cifsi = CIFS_I(inode);
376         struct cifsLockInfo *li, *tmp;
377         struct super_block *sb = inode->i_sb;
378
379         cifs_fscache_release_inode_cookie(inode);
380
381         /*
382          * Delete any outstanding lock records. We'll lose them when the file
383          * is closed anyway.
384          */
385         cifs_down_write(&cifsi->lock_sem);
386         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
387                 list_del(&li->llist);
388                 cifs_del_lock_waiters(li);
389                 kfree(li);
390         }
391         list_del(&cifs_file->llist->llist);
392         kfree(cifs_file->llist);
393         up_write(&cifsi->lock_sem);
394
395         cifs_put_tlink(cifs_file->tlink);
396         dput(cifs_file->dentry);
397         cifs_sb_deactive(sb);
398         kfree(cifs_file);
399 }
400
401 static void cifsFileInfo_put_work(struct work_struct *work)
402 {
403         struct cifsFileInfo *cifs_file = container_of(work,
404                         struct cifsFileInfo, put);
405
406         cifsFileInfo_put_final(cifs_file);
407 }
408
409 /**
410  * cifsFileInfo_put - release a reference of file priv data
411  *
412  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
413  *
414  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
415  */
416 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
417 {
418         _cifsFileInfo_put(cifs_file, true, true);
419 }
420
421 /**
422  * _cifsFileInfo_put - release a reference of file priv data
423  *
424  * This may involve closing the filehandle @cifs_file out on the
425  * server. Must be called without holding tcon->open_file_lock,
426  * cinode->open_file_lock and cifs_file->file_info_lock.
427  *
428  * If @wait_for_oplock_handler is true and we are releasing the last
429  * reference, wait for any running oplock break handler of the file
430  * and cancel any pending one.
431  *
432  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
433  * @wait_oplock_handler: must be false if called from oplock_break_handler
434  * @offload:    not offloaded on close and oplock breaks
435  *
436  */
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438                        bool wait_oplock_handler, bool offload)
439 {
440         struct inode *inode = d_inode(cifs_file->dentry);
441         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442         struct TCP_Server_Info *server = tcon->ses->server;
443         struct cifsInodeInfo *cifsi = CIFS_I(inode);
444         struct super_block *sb = inode->i_sb;
445         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446         struct cifs_fid fid;
447         struct cifs_pending_open open;
448         bool oplock_break_cancelled;
449
450         spin_lock(&tcon->open_file_lock);
451         spin_lock(&cifsi->open_file_lock);
452         spin_lock(&cifs_file->file_info_lock);
453         if (--cifs_file->count > 0) {
454                 spin_unlock(&cifs_file->file_info_lock);
455                 spin_unlock(&cifsi->open_file_lock);
456                 spin_unlock(&tcon->open_file_lock);
457                 return;
458         }
459         spin_unlock(&cifs_file->file_info_lock);
460
461         if (server->ops->get_lease_key)
462                 server->ops->get_lease_key(inode, &fid);
463
464         /* store open in pending opens to make sure we don't miss lease break */
465         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
466
467         /* remove it from the lists */
468         list_del(&cifs_file->flist);
469         list_del(&cifs_file->tlist);
470         atomic_dec(&tcon->num_local_opens);
471
472         if (list_empty(&cifsi->openFileList)) {
473                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474                          d_inode(cifs_file->dentry));
475                 /*
476                  * In strict cache mode we need invalidate mapping on the last
477                  * close  because it may cause a error when we open this file
478                  * again and get at least level II oplock.
479                  */
480                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482                 cifs_set_oplock_level(cifsi, 0);
483         }
484
485         spin_unlock(&cifsi->open_file_lock);
486         spin_unlock(&tcon->open_file_lock);
487
488         oplock_break_cancelled = wait_oplock_handler ?
489                 cancel_work_sync(&cifs_file->oplock_break) : false;
490
491         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492                 struct TCP_Server_Info *server = tcon->ses->server;
493                 unsigned int xid;
494
495                 xid = get_xid();
496                 if (server->ops->close_getattr)
497                         server->ops->close_getattr(xid, tcon, cifs_file);
498                 else if (server->ops->close)
499                         server->ops->close(xid, tcon, &cifs_file->fid);
500                 _free_xid(xid);
501         }
502
503         if (oplock_break_cancelled)
504                 cifs_done_oplock_break(cifsi);
505
506         cifs_del_pending_open(&open);
507
508         if (offload)
509                 queue_work(fileinfo_put_wq, &cifs_file->put);
510         else
511                 cifsFileInfo_put_final(cifs_file);
512 }
513
514 int cifs_open(struct inode *inode, struct file *file)
515
516 {
517         int rc = -EACCES;
518         unsigned int xid;
519         __u32 oplock;
520         struct cifs_sb_info *cifs_sb;
521         struct TCP_Server_Info *server;
522         struct cifs_tcon *tcon;
523         struct tcon_link *tlink;
524         struct cifsFileInfo *cfile = NULL;
525         void *page;
526         const char *full_path;
527         bool posix_open_ok = false;
528         struct cifs_fid fid;
529         struct cifs_pending_open open;
530
531         xid = get_xid();
532
533         cifs_sb = CIFS_SB(inode->i_sb);
534         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
535                 free_xid(xid);
536                 return -EIO;
537         }
538
539         tlink = cifs_sb_tlink(cifs_sb);
540         if (IS_ERR(tlink)) {
541                 free_xid(xid);
542                 return PTR_ERR(tlink);
543         }
544         tcon = tlink_tcon(tlink);
545         server = tcon->ses->server;
546
547         page = alloc_dentry_path();
548         full_path = build_path_from_dentry(file_dentry(file), page);
549         if (IS_ERR(full_path)) {
550                 rc = PTR_ERR(full_path);
551                 goto out;
552         }
553
554         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
555                  inode, file->f_flags, full_path);
556
557         if (file->f_flags & O_DIRECT &&
558             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
559                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
560                         file->f_op = &cifs_file_direct_nobrl_ops;
561                 else
562                         file->f_op = &cifs_file_direct_ops;
563         }
564
565         /* Get the cached handle as SMB2 close is deferred */
566         rc = cifs_get_readable_path(tcon, full_path, &cfile);
567         if (rc == 0) {
568                 if (file->f_flags == cfile->f_flags) {
569                         file->private_data = cfile;
570                         spin_lock(&CIFS_I(inode)->deferred_lock);
571                         cifs_del_deferred_close(cfile);
572                         spin_unlock(&CIFS_I(inode)->deferred_lock);
573                         goto out;
574                 } else {
575                         _cifsFileInfo_put(cfile, true, false);
576                 }
577         }
578
579         if (server->oplocks)
580                 oplock = REQ_OPLOCK;
581         else
582                 oplock = 0;
583
584         if (!tcon->broken_posix_open && tcon->unix_ext &&
585             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
586                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
587                 /* can not refresh inode info since size could be stale */
588                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
589                                 cifs_sb->ctx->file_mode /* ignored */,
590                                 file->f_flags, &oplock, &fid.netfid, xid);
591                 if (rc == 0) {
592                         cifs_dbg(FYI, "posix open succeeded\n");
593                         posix_open_ok = true;
594                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
595                         if (tcon->ses->serverNOS)
596                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
597                                          tcon->ses->ip_addr,
598                                          tcon->ses->serverNOS);
599                         tcon->broken_posix_open = true;
600                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
601                          (rc != -EOPNOTSUPP)) /* path not found or net err */
602                         goto out;
603                 /*
604                  * Else fallthrough to retry open the old way on network i/o
605                  * or DFS errors.
606                  */
607         }
608
609         if (server->ops->get_lease_key)
610                 server->ops->get_lease_key(inode, &fid);
611
612         cifs_add_pending_open(&fid, tlink, &open);
613
614         if (!posix_open_ok) {
615                 if (server->ops->get_lease_key)
616                         server->ops->get_lease_key(inode, &fid);
617
618                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
619                                   file->f_flags, &oplock, &fid, xid);
620                 if (rc) {
621                         cifs_del_pending_open(&open);
622                         goto out;
623                 }
624         }
625
626         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
627         if (cfile == NULL) {
628                 if (server->ops->close)
629                         server->ops->close(xid, tcon, &fid);
630                 cifs_del_pending_open(&open);
631                 rc = -ENOMEM;
632                 goto out;
633         }
634
635         cifs_fscache_set_inode_cookie(inode, file);
636
637         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
638                 /*
639                  * Time to set mode which we can not set earlier due to
640                  * problems creating new read-only files.
641                  */
642                 struct cifs_unix_set_info_args args = {
643                         .mode   = inode->i_mode,
644                         .uid    = INVALID_UID, /* no change */
645                         .gid    = INVALID_GID, /* no change */
646                         .ctime  = NO_CHANGE_64,
647                         .atime  = NO_CHANGE_64,
648                         .mtime  = NO_CHANGE_64,
649                         .device = 0,
650                 };
651                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
652                                        cfile->pid);
653         }
654
655 out:
656         free_dentry_path(page);
657         free_xid(xid);
658         cifs_put_tlink(tlink);
659         return rc;
660 }
661
662 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
663
664 /*
665  * Try to reacquire byte range locks that were released when session
666  * to server was lost.
667  */
668 static int
669 cifs_relock_file(struct cifsFileInfo *cfile)
670 {
671         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
672         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
673         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
674         int rc = 0;
675
676         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
677         if (cinode->can_cache_brlcks) {
678                 /* can cache locks - no need to relock */
679                 up_read(&cinode->lock_sem);
680                 return rc;
681         }
682
683         if (cap_unix(tcon->ses) &&
684             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
685             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
686                 rc = cifs_push_posix_locks(cfile);
687         else
688                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
689
690         up_read(&cinode->lock_sem);
691         return rc;
692 }
693
694 static int
695 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
696 {
697         int rc = -EACCES;
698         unsigned int xid;
699         __u32 oplock;
700         struct cifs_sb_info *cifs_sb;
701         struct cifs_tcon *tcon;
702         struct TCP_Server_Info *server;
703         struct cifsInodeInfo *cinode;
704         struct inode *inode;
705         void *page;
706         const char *full_path;
707         int desired_access;
708         int disposition = FILE_OPEN;
709         int create_options = CREATE_NOT_DIR;
710         struct cifs_open_parms oparms;
711
712         xid = get_xid();
713         mutex_lock(&cfile->fh_mutex);
714         if (!cfile->invalidHandle) {
715                 mutex_unlock(&cfile->fh_mutex);
716                 free_xid(xid);
717                 return 0;
718         }
719
720         inode = d_inode(cfile->dentry);
721         cifs_sb = CIFS_SB(inode->i_sb);
722         tcon = tlink_tcon(cfile->tlink);
723         server = tcon->ses->server;
724
725         /*
726          * Can not grab rename sem here because various ops, including those
727          * that already have the rename sem can end up causing writepage to get
728          * called and if the server was down that means we end up here, and we
729          * can never tell if the caller already has the rename_sem.
730          */
731         page = alloc_dentry_path();
732         full_path = build_path_from_dentry(cfile->dentry, page);
733         if (IS_ERR(full_path)) {
734                 mutex_unlock(&cfile->fh_mutex);
735                 free_dentry_path(page);
736                 free_xid(xid);
737                 return PTR_ERR(full_path);
738         }
739
740         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
741                  inode, cfile->f_flags, full_path);
742
743         if (tcon->ses->server->oplocks)
744                 oplock = REQ_OPLOCK;
745         else
746                 oplock = 0;
747
748         if (tcon->unix_ext && cap_unix(tcon->ses) &&
749             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
750                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
751                 /*
752                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
753                  * original open. Must mask them off for a reopen.
754                  */
755                 unsigned int oflags = cfile->f_flags &
756                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
757
758                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
759                                      cifs_sb->ctx->file_mode /* ignored */,
760                                      oflags, &oplock, &cfile->fid.netfid, xid);
761                 if (rc == 0) {
762                         cifs_dbg(FYI, "posix reopen succeeded\n");
763                         oparms.reconnect = true;
764                         goto reopen_success;
765                 }
766                 /*
767                  * fallthrough to retry open the old way on errors, especially
768                  * in the reconnect path it is important to retry hard
769                  */
770         }
771
772         desired_access = cifs_convert_flags(cfile->f_flags);
773
774         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
775         if (cfile->f_flags & O_SYNC)
776                 create_options |= CREATE_WRITE_THROUGH;
777
778         if (cfile->f_flags & O_DIRECT)
779                 create_options |= CREATE_NO_BUFFER;
780
781         if (server->ops->get_lease_key)
782                 server->ops->get_lease_key(inode, &cfile->fid);
783
784         oparms.tcon = tcon;
785         oparms.cifs_sb = cifs_sb;
786         oparms.desired_access = desired_access;
787         oparms.create_options = cifs_create_options(cifs_sb, create_options);
788         oparms.disposition = disposition;
789         oparms.path = full_path;
790         oparms.fid = &cfile->fid;
791         oparms.reconnect = true;
792
793         /*
794          * Can not refresh inode by passing in file_info buf to be returned by
795          * ops->open and then calling get_inode_info with returned buf since
796          * file might have write behind data that needs to be flushed and server
797          * version of file size can be stale. If we knew for sure that inode was
798          * not dirty locally we could do this.
799          */
800         rc = server->ops->open(xid, &oparms, &oplock, NULL);
801         if (rc == -ENOENT && oparms.reconnect == false) {
802                 /* durable handle timeout is expired - open the file again */
803                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
804                 /* indicate that we need to relock the file */
805                 oparms.reconnect = true;
806         }
807
808         if (rc) {
809                 mutex_unlock(&cfile->fh_mutex);
810                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
811                 cifs_dbg(FYI, "oplock: %d\n", oplock);
812                 goto reopen_error_exit;
813         }
814
815 reopen_success:
816         cfile->invalidHandle = false;
817         mutex_unlock(&cfile->fh_mutex);
818         cinode = CIFS_I(inode);
819
820         if (can_flush) {
821                 rc = filemap_write_and_wait(inode->i_mapping);
822                 if (!is_interrupt_error(rc))
823                         mapping_set_error(inode->i_mapping, rc);
824
825                 if (tcon->posix_extensions)
826                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
827                 else if (tcon->unix_ext)
828                         rc = cifs_get_inode_info_unix(&inode, full_path,
829                                                       inode->i_sb, xid);
830                 else
831                         rc = cifs_get_inode_info(&inode, full_path, NULL,
832                                                  inode->i_sb, xid, NULL);
833         }
834         /*
835          * Else we are writing out data to server already and could deadlock if
836          * we tried to flush data, and since we do not know if we have data that
837          * would invalidate the current end of file on the server we can not go
838          * to the server to get the new inode info.
839          */
840
841         /*
842          * If the server returned a read oplock and we have mandatory brlocks,
843          * set oplock level to None.
844          */
845         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
846                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
847                 oplock = 0;
848         }
849
850         server->ops->set_fid(cfile, &cfile->fid, oplock);
851         if (oparms.reconnect)
852                 cifs_relock_file(cfile);
853
854 reopen_error_exit:
855         free_dentry_path(page);
856         free_xid(xid);
857         return rc;
858 }
859
860 void smb2_deferred_work_close(struct work_struct *work)
861 {
862         struct cifsFileInfo *cfile = container_of(work,
863                         struct cifsFileInfo, deferred.work);
864
865         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
866         cifs_del_deferred_close(cfile);
867         cfile->deferred_close_scheduled = false;
868         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
869         _cifsFileInfo_put(cfile, true, false);
870 }
871
872 int cifs_close(struct inode *inode, struct file *file)
873 {
874         struct cifsFileInfo *cfile;
875         struct cifsInodeInfo *cinode = CIFS_I(inode);
876         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
877         struct cifs_deferred_close *dclose;
878
879         if (file->private_data != NULL) {
880                 cfile = file->private_data;
881                 file->private_data = NULL;
882                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
883                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
884                     cinode->lease_granted &&
885                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
886                     dclose) {
887                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
888                                 inode->i_ctime = inode->i_mtime = current_time(inode);
889                                 cifs_fscache_update_inode_cookie(inode);
890                         }
891                         spin_lock(&cinode->deferred_lock);
892                         cifs_add_deferred_close(cfile, dclose);
893                         if (cfile->deferred_close_scheduled &&
894                             delayed_work_pending(&cfile->deferred)) {
895                                 /*
896                                  * If there is no pending work, mod_delayed_work queues new work.
897                                  * So, Increase the ref count to avoid use-after-free.
898                                  */
899                                 if (!mod_delayed_work(deferredclose_wq,
900                                                 &cfile->deferred, cifs_sb->ctx->acregmax))
901                                         cifsFileInfo_get(cfile);
902                         } else {
903                                 /* Deferred close for files */
904                                 queue_delayed_work(deferredclose_wq,
905                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
906                                 cfile->deferred_close_scheduled = true;
907                                 spin_unlock(&cinode->deferred_lock);
908                                 return 0;
909                         }
910                         spin_unlock(&cinode->deferred_lock);
911                         _cifsFileInfo_put(cfile, true, false);
912                 } else {
913                         _cifsFileInfo_put(cfile, true, false);
914                         kfree(dclose);
915                 }
916         }
917
918         /* return code from the ->release op is always ignored */
919         return 0;
920 }
921
922 void
923 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
924 {
925         struct cifsFileInfo *open_file;
926         struct list_head *tmp;
927         struct list_head *tmp1;
928         struct list_head tmp_list;
929
930         if (!tcon->use_persistent || !tcon->need_reopen_files)
931                 return;
932
933         tcon->need_reopen_files = false;
934
935         cifs_dbg(FYI, "Reopen persistent handles\n");
936         INIT_LIST_HEAD(&tmp_list);
937
938         /* list all files open on tree connection, reopen resilient handles  */
939         spin_lock(&tcon->open_file_lock);
940         list_for_each(tmp, &tcon->openFileList) {
941                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
942                 if (!open_file->invalidHandle)
943                         continue;
944                 cifsFileInfo_get(open_file);
945                 list_add_tail(&open_file->rlist, &tmp_list);
946         }
947         spin_unlock(&tcon->open_file_lock);
948
949         list_for_each_safe(tmp, tmp1, &tmp_list) {
950                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
951                 if (cifs_reopen_file(open_file, false /* do not flush */))
952                         tcon->need_reopen_files = true;
953                 list_del_init(&open_file->rlist);
954                 cifsFileInfo_put(open_file);
955         }
956 }
957
958 int cifs_closedir(struct inode *inode, struct file *file)
959 {
960         int rc = 0;
961         unsigned int xid;
962         struct cifsFileInfo *cfile = file->private_data;
963         struct cifs_tcon *tcon;
964         struct TCP_Server_Info *server;
965         char *buf;
966
967         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
968
969         if (cfile == NULL)
970                 return rc;
971
972         xid = get_xid();
973         tcon = tlink_tcon(cfile->tlink);
974         server = tcon->ses->server;
975
976         cifs_dbg(FYI, "Freeing private data in close dir\n");
977         spin_lock(&cfile->file_info_lock);
978         if (server->ops->dir_needs_close(cfile)) {
979                 cfile->invalidHandle = true;
980                 spin_unlock(&cfile->file_info_lock);
981                 if (server->ops->close_dir)
982                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
983                 else
984                         rc = -ENOSYS;
985                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
986                 /* not much we can do if it fails anyway, ignore rc */
987                 rc = 0;
988         } else
989                 spin_unlock(&cfile->file_info_lock);
990
991         buf = cfile->srch_inf.ntwrk_buf_start;
992         if (buf) {
993                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
994                 cfile->srch_inf.ntwrk_buf_start = NULL;
995                 if (cfile->srch_inf.smallBuf)
996                         cifs_small_buf_release(buf);
997                 else
998                         cifs_buf_release(buf);
999         }
1000
1001         cifs_put_tlink(cfile->tlink);
1002         kfree(file->private_data);
1003         file->private_data = NULL;
1004         /* BB can we lock the filestruct while this is going on? */
1005         free_xid(xid);
1006         return rc;
1007 }
1008
1009 static struct cifsLockInfo *
1010 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1011 {
1012         struct cifsLockInfo *lock =
1013                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1014         if (!lock)
1015                 return lock;
1016         lock->offset = offset;
1017         lock->length = length;
1018         lock->type = type;
1019         lock->pid = current->tgid;
1020         lock->flags = flags;
1021         INIT_LIST_HEAD(&lock->blist);
1022         init_waitqueue_head(&lock->block_q);
1023         return lock;
1024 }
1025
1026 void
1027 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1028 {
1029         struct cifsLockInfo *li, *tmp;
1030         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1031                 list_del_init(&li->blist);
1032                 wake_up(&li->block_q);
1033         }
1034 }
1035
1036 #define CIFS_LOCK_OP    0
1037 #define CIFS_READ_OP    1
1038 #define CIFS_WRITE_OP   2
1039
1040 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1041 static bool
1042 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1043                             __u64 length, __u8 type, __u16 flags,
1044                             struct cifsFileInfo *cfile,
1045                             struct cifsLockInfo **conf_lock, int rw_check)
1046 {
1047         struct cifsLockInfo *li;
1048         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1049         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1050
1051         list_for_each_entry(li, &fdlocks->locks, llist) {
1052                 if (offset + length <= li->offset ||
1053                     offset >= li->offset + li->length)
1054                         continue;
1055                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1056                     server->ops->compare_fids(cfile, cur_cfile)) {
1057                         /* shared lock prevents write op through the same fid */
1058                         if (!(li->type & server->vals->shared_lock_type) ||
1059                             rw_check != CIFS_WRITE_OP)
1060                                 continue;
1061                 }
1062                 if ((type & server->vals->shared_lock_type) &&
1063                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1064                      current->tgid == li->pid) || type == li->type))
1065                         continue;
1066                 if (rw_check == CIFS_LOCK_OP &&
1067                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1068                     server->ops->compare_fids(cfile, cur_cfile))
1069                         continue;
1070                 if (conf_lock)
1071                         *conf_lock = li;
1072                 return true;
1073         }
1074         return false;
1075 }
1076
1077 bool
1078 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1079                         __u8 type, __u16 flags,
1080                         struct cifsLockInfo **conf_lock, int rw_check)
1081 {
1082         bool rc = false;
1083         struct cifs_fid_locks *cur;
1084         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085
1086         list_for_each_entry(cur, &cinode->llist, llist) {
1087                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1088                                                  flags, cfile, conf_lock,
1089                                                  rw_check);
1090                 if (rc)
1091                         break;
1092         }
1093
1094         return rc;
1095 }
1096
1097 /*
1098  * Check if there is another lock that prevents us to set the lock (mandatory
1099  * style). If such a lock exists, update the flock structure with its
1100  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1101  * or leave it the same if we can't. Returns 0 if we don't need to request to
1102  * the server or 1 otherwise.
1103  */
1104 static int
1105 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1106                __u8 type, struct file_lock *flock)
1107 {
1108         int rc = 0;
1109         struct cifsLockInfo *conf_lock;
1110         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1111         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1112         bool exist;
1113
1114         down_read(&cinode->lock_sem);
1115
1116         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1117                                         flock->fl_flags, &conf_lock,
1118                                         CIFS_LOCK_OP);
1119         if (exist) {
1120                 flock->fl_start = conf_lock->offset;
1121                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1122                 flock->fl_pid = conf_lock->pid;
1123                 if (conf_lock->type & server->vals->shared_lock_type)
1124                         flock->fl_type = F_RDLCK;
1125                 else
1126                         flock->fl_type = F_WRLCK;
1127         } else if (!cinode->can_cache_brlcks)
1128                 rc = 1;
1129         else
1130                 flock->fl_type = F_UNLCK;
1131
1132         up_read(&cinode->lock_sem);
1133         return rc;
1134 }
1135
1136 static void
1137 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1138 {
1139         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1140         cifs_down_write(&cinode->lock_sem);
1141         list_add_tail(&lock->llist, &cfile->llist->locks);
1142         up_write(&cinode->lock_sem);
1143 }
1144
1145 /*
1146  * Set the byte-range lock (mandatory style). Returns:
1147  * 1) 0, if we set the lock and don't need to request to the server;
1148  * 2) 1, if no locks prevent us but we need to request to the server;
1149  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1150  */
1151 static int
1152 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1153                  bool wait)
1154 {
1155         struct cifsLockInfo *conf_lock;
1156         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1157         bool exist;
1158         int rc = 0;
1159
1160 try_again:
1161         exist = false;
1162         cifs_down_write(&cinode->lock_sem);
1163
1164         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1165                                         lock->type, lock->flags, &conf_lock,
1166                                         CIFS_LOCK_OP);
1167         if (!exist && cinode->can_cache_brlcks) {
1168                 list_add_tail(&lock->llist, &cfile->llist->locks);
1169                 up_write(&cinode->lock_sem);
1170                 return rc;
1171         }
1172
1173         if (!exist)
1174                 rc = 1;
1175         else if (!wait)
1176                 rc = -EACCES;
1177         else {
1178                 list_add_tail(&lock->blist, &conf_lock->blist);
1179                 up_write(&cinode->lock_sem);
1180                 rc = wait_event_interruptible(lock->block_q,
1181                                         (lock->blist.prev == &lock->blist) &&
1182                                         (lock->blist.next == &lock->blist));
1183                 if (!rc)
1184                         goto try_again;
1185                 cifs_down_write(&cinode->lock_sem);
1186                 list_del_init(&lock->blist);
1187         }
1188
1189         up_write(&cinode->lock_sem);
1190         return rc;
1191 }
1192
1193 /*
1194  * Check if there is another lock that prevents us to set the lock (posix
1195  * style). If such a lock exists, update the flock structure with its
1196  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1197  * or leave it the same if we can't. Returns 0 if we don't need to request to
1198  * the server or 1 otherwise.
1199  */
1200 static int
1201 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1202 {
1203         int rc = 0;
1204         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1205         unsigned char saved_type = flock->fl_type;
1206
1207         if ((flock->fl_flags & FL_POSIX) == 0)
1208                 return 1;
1209
1210         down_read(&cinode->lock_sem);
1211         posix_test_lock(file, flock);
1212
1213         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1214                 flock->fl_type = saved_type;
1215                 rc = 1;
1216         }
1217
1218         up_read(&cinode->lock_sem);
1219         return rc;
1220 }
1221
1222 /*
1223  * Set the byte-range lock (posix style). Returns:
1224  * 1) <0, if the error occurs while setting the lock;
1225  * 2) 0, if we set the lock and don't need to request to the server;
1226  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1227  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1228  */
1229 static int
1230 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1231 {
1232         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1233         int rc = FILE_LOCK_DEFERRED + 1;
1234
1235         if ((flock->fl_flags & FL_POSIX) == 0)
1236                 return rc;
1237
1238         cifs_down_write(&cinode->lock_sem);
1239         if (!cinode->can_cache_brlcks) {
1240                 up_write(&cinode->lock_sem);
1241                 return rc;
1242         }
1243
1244         rc = posix_lock_file(file, flock, NULL);
1245         up_write(&cinode->lock_sem);
1246         return rc;
1247 }
1248
1249 int
1250 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1251 {
1252         unsigned int xid;
1253         int rc = 0, stored_rc;
1254         struct cifsLockInfo *li, *tmp;
1255         struct cifs_tcon *tcon;
1256         unsigned int num, max_num, max_buf;
1257         LOCKING_ANDX_RANGE *buf, *cur;
1258         static const int types[] = {
1259                 LOCKING_ANDX_LARGE_FILES,
1260                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1261         };
1262         int i;
1263
1264         xid = get_xid();
1265         tcon = tlink_tcon(cfile->tlink);
1266
1267         /*
1268          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1269          * and check it before using.
1270          */
1271         max_buf = tcon->ses->server->maxBuf;
1272         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1273                 free_xid(xid);
1274                 return -EINVAL;
1275         }
1276
1277         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1278                      PAGE_SIZE);
1279         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1280                         PAGE_SIZE);
1281         max_num = (max_buf - sizeof(struct smb_hdr)) /
1282                                                 sizeof(LOCKING_ANDX_RANGE);
1283         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1284         if (!buf) {
1285                 free_xid(xid);
1286                 return -ENOMEM;
1287         }
1288
1289         for (i = 0; i < 2; i++) {
1290                 cur = buf;
1291                 num = 0;
1292                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1293                         if (li->type != types[i])
1294                                 continue;
1295                         cur->Pid = cpu_to_le16(li->pid);
1296                         cur->LengthLow = cpu_to_le32((u32)li->length);
1297                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1298                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1299                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1300                         if (++num == max_num) {
1301                                 stored_rc = cifs_lockv(xid, tcon,
1302                                                        cfile->fid.netfid,
1303                                                        (__u8)li->type, 0, num,
1304                                                        buf);
1305                                 if (stored_rc)
1306                                         rc = stored_rc;
1307                                 cur = buf;
1308                                 num = 0;
1309                         } else
1310                                 cur++;
1311                 }
1312
1313                 if (num) {
1314                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1315                                                (__u8)types[i], 0, num, buf);
1316                         if (stored_rc)
1317                                 rc = stored_rc;
1318                 }
1319         }
1320
1321         kfree(buf);
1322         free_xid(xid);
1323         return rc;
1324 }
1325
1326 static __u32
1327 hash_lockowner(fl_owner_t owner)
1328 {
1329         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1330 }
1331
1332 struct lock_to_push {
1333         struct list_head llist;
1334         __u64 offset;
1335         __u64 length;
1336         __u32 pid;
1337         __u16 netfid;
1338         __u8 type;
1339 };
1340
1341 static int
1342 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1343 {
1344         struct inode *inode = d_inode(cfile->dentry);
1345         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1346         struct file_lock *flock;
1347         struct file_lock_context *flctx = inode->i_flctx;
1348         unsigned int count = 0, i;
1349         int rc = 0, xid, type;
1350         struct list_head locks_to_send, *el;
1351         struct lock_to_push *lck, *tmp;
1352         __u64 length;
1353
1354         xid = get_xid();
1355
1356         if (!flctx)
1357                 goto out;
1358
1359         spin_lock(&flctx->flc_lock);
1360         list_for_each(el, &flctx->flc_posix) {
1361                 count++;
1362         }
1363         spin_unlock(&flctx->flc_lock);
1364
1365         INIT_LIST_HEAD(&locks_to_send);
1366
1367         /*
1368          * Allocating count locks is enough because no FL_POSIX locks can be
1369          * added to the list while we are holding cinode->lock_sem that
1370          * protects locking operations of this inode.
1371          */
1372         for (i = 0; i < count; i++) {
1373                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1374                 if (!lck) {
1375                         rc = -ENOMEM;
1376                         goto err_out;
1377                 }
1378                 list_add_tail(&lck->llist, &locks_to_send);
1379         }
1380
1381         el = locks_to_send.next;
1382         spin_lock(&flctx->flc_lock);
1383         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1384                 if (el == &locks_to_send) {
1385                         /*
1386                          * The list ended. We don't have enough allocated
1387                          * structures - something is really wrong.
1388                          */
1389                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1390                         break;
1391                 }
1392                 length = 1 + flock->fl_end - flock->fl_start;
1393                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1394                         type = CIFS_RDLCK;
1395                 else
1396                         type = CIFS_WRLCK;
1397                 lck = list_entry(el, struct lock_to_push, llist);
1398                 lck->pid = hash_lockowner(flock->fl_owner);
1399                 lck->netfid = cfile->fid.netfid;
1400                 lck->length = length;
1401                 lck->type = type;
1402                 lck->offset = flock->fl_start;
1403         }
1404         spin_unlock(&flctx->flc_lock);
1405
1406         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1407                 int stored_rc;
1408
1409                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1410                                              lck->offset, lck->length, NULL,
1411                                              lck->type, 0);
1412                 if (stored_rc)
1413                         rc = stored_rc;
1414                 list_del(&lck->llist);
1415                 kfree(lck);
1416         }
1417
1418 out:
1419         free_xid(xid);
1420         return rc;
1421 err_out:
1422         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1423                 list_del(&lck->llist);
1424                 kfree(lck);
1425         }
1426         goto out;
1427 }
1428
1429 static int
1430 cifs_push_locks(struct cifsFileInfo *cfile)
1431 {
1432         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1433         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1434         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1435         int rc = 0;
1436
1437         /* we are going to update can_cache_brlcks here - need a write access */
1438         cifs_down_write(&cinode->lock_sem);
1439         if (!cinode->can_cache_brlcks) {
1440                 up_write(&cinode->lock_sem);
1441                 return rc;
1442         }
1443
1444         if (cap_unix(tcon->ses) &&
1445             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1446             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1447                 rc = cifs_push_posix_locks(cfile);
1448         else
1449                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1450
1451         cinode->can_cache_brlcks = false;
1452         up_write(&cinode->lock_sem);
1453         return rc;
1454 }
1455
1456 static void
1457 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1458                 bool *wait_flag, struct TCP_Server_Info *server)
1459 {
1460         if (flock->fl_flags & FL_POSIX)
1461                 cifs_dbg(FYI, "Posix\n");
1462         if (flock->fl_flags & FL_FLOCK)
1463                 cifs_dbg(FYI, "Flock\n");
1464         if (flock->fl_flags & FL_SLEEP) {
1465                 cifs_dbg(FYI, "Blocking lock\n");
1466                 *wait_flag = true;
1467         }
1468         if (flock->fl_flags & FL_ACCESS)
1469                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1470         if (flock->fl_flags & FL_LEASE)
1471                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1472         if (flock->fl_flags &
1473             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1474                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1475                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1476
1477         *type = server->vals->large_lock_type;
1478         if (flock->fl_type == F_WRLCK) {
1479                 cifs_dbg(FYI, "F_WRLCK\n");
1480                 *type |= server->vals->exclusive_lock_type;
1481                 *lock = 1;
1482         } else if (flock->fl_type == F_UNLCK) {
1483                 cifs_dbg(FYI, "F_UNLCK\n");
1484                 *type |= server->vals->unlock_lock_type;
1485                 *unlock = 1;
1486                 /* Check if unlock includes more than one lock range */
1487         } else if (flock->fl_type == F_RDLCK) {
1488                 cifs_dbg(FYI, "F_RDLCK\n");
1489                 *type |= server->vals->shared_lock_type;
1490                 *lock = 1;
1491         } else if (flock->fl_type == F_EXLCK) {
1492                 cifs_dbg(FYI, "F_EXLCK\n");
1493                 *type |= server->vals->exclusive_lock_type;
1494                 *lock = 1;
1495         } else if (flock->fl_type == F_SHLCK) {
1496                 cifs_dbg(FYI, "F_SHLCK\n");
1497                 *type |= server->vals->shared_lock_type;
1498                 *lock = 1;
1499         } else
1500                 cifs_dbg(FYI, "Unknown type of lock\n");
1501 }
1502
1503 static int
1504 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1505            bool wait_flag, bool posix_lck, unsigned int xid)
1506 {
1507         int rc = 0;
1508         __u64 length = 1 + flock->fl_end - flock->fl_start;
1509         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1510         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1511         struct TCP_Server_Info *server = tcon->ses->server;
1512         __u16 netfid = cfile->fid.netfid;
1513
1514         if (posix_lck) {
1515                 int posix_lock_type;
1516
1517                 rc = cifs_posix_lock_test(file, flock);
1518                 if (!rc)
1519                         return rc;
1520
1521                 if (type & server->vals->shared_lock_type)
1522                         posix_lock_type = CIFS_RDLCK;
1523                 else
1524                         posix_lock_type = CIFS_WRLCK;
1525                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1526                                       hash_lockowner(flock->fl_owner),
1527                                       flock->fl_start, length, flock,
1528                                       posix_lock_type, wait_flag);
1529                 return rc;
1530         }
1531
1532         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1533         if (!rc)
1534                 return rc;
1535
1536         /* BB we could chain these into one lock request BB */
1537         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1538                                     1, 0, false);
1539         if (rc == 0) {
1540                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1541                                             type, 0, 1, false);
1542                 flock->fl_type = F_UNLCK;
1543                 if (rc != 0)
1544                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1545                                  rc);
1546                 return 0;
1547         }
1548
1549         if (type & server->vals->shared_lock_type) {
1550                 flock->fl_type = F_WRLCK;
1551                 return 0;
1552         }
1553
1554         type &= ~server->vals->exclusive_lock_type;
1555
1556         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557                                     type | server->vals->shared_lock_type,
1558                                     1, 0, false);
1559         if (rc == 0) {
1560                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1561                         type | server->vals->shared_lock_type, 0, 1, false);
1562                 flock->fl_type = F_RDLCK;
1563                 if (rc != 0)
1564                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1565                                  rc);
1566         } else
1567                 flock->fl_type = F_WRLCK;
1568
1569         return 0;
1570 }
1571
1572 void
1573 cifs_move_llist(struct list_head *source, struct list_head *dest)
1574 {
1575         struct list_head *li, *tmp;
1576         list_for_each_safe(li, tmp, source)
1577                 list_move(li, dest);
1578 }
1579
1580 void
1581 cifs_free_llist(struct list_head *llist)
1582 {
1583         struct cifsLockInfo *li, *tmp;
1584         list_for_each_entry_safe(li, tmp, llist, llist) {
1585                 cifs_del_lock_waiters(li);
1586                 list_del(&li->llist);
1587                 kfree(li);
1588         }
1589 }
1590
1591 int
1592 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1593                   unsigned int xid)
1594 {
1595         int rc = 0, stored_rc;
1596         static const int types[] = {
1597                 LOCKING_ANDX_LARGE_FILES,
1598                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1599         };
1600         unsigned int i;
1601         unsigned int max_num, num, max_buf;
1602         LOCKING_ANDX_RANGE *buf, *cur;
1603         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1604         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1605         struct cifsLockInfo *li, *tmp;
1606         __u64 length = 1 + flock->fl_end - flock->fl_start;
1607         struct list_head tmp_llist;
1608
1609         INIT_LIST_HEAD(&tmp_llist);
1610
1611         /*
1612          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1613          * and check it before using.
1614          */
1615         max_buf = tcon->ses->server->maxBuf;
1616         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1617                 return -EINVAL;
1618
1619         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1620                      PAGE_SIZE);
1621         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1622                         PAGE_SIZE);
1623         max_num = (max_buf - sizeof(struct smb_hdr)) /
1624                                                 sizeof(LOCKING_ANDX_RANGE);
1625         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1626         if (!buf)
1627                 return -ENOMEM;
1628
1629         cifs_down_write(&cinode->lock_sem);
1630         for (i = 0; i < 2; i++) {
1631                 cur = buf;
1632                 num = 0;
1633                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1634                         if (flock->fl_start > li->offset ||
1635                             (flock->fl_start + length) <
1636                             (li->offset + li->length))
1637                                 continue;
1638                         if (current->tgid != li->pid)
1639                                 continue;
1640                         if (types[i] != li->type)
1641                                 continue;
1642                         if (cinode->can_cache_brlcks) {
1643                                 /*
1644                                  * We can cache brlock requests - simply remove
1645                                  * a lock from the file's list.
1646                                  */
1647                                 list_del(&li->llist);
1648                                 cifs_del_lock_waiters(li);
1649                                 kfree(li);
1650                                 continue;
1651                         }
1652                         cur->Pid = cpu_to_le16(li->pid);
1653                         cur->LengthLow = cpu_to_le32((u32)li->length);
1654                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1655                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1656                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1657                         /*
1658                          * We need to save a lock here to let us add it again to
1659                          * the file's list if the unlock range request fails on
1660                          * the server.
1661                          */
1662                         list_move(&li->llist, &tmp_llist);
1663                         if (++num == max_num) {
1664                                 stored_rc = cifs_lockv(xid, tcon,
1665                                                        cfile->fid.netfid,
1666                                                        li->type, num, 0, buf);
1667                                 if (stored_rc) {
1668                                         /*
1669                                          * We failed on the unlock range
1670                                          * request - add all locks from the tmp
1671                                          * list to the head of the file's list.
1672                                          */
1673                                         cifs_move_llist(&tmp_llist,
1674                                                         &cfile->llist->locks);
1675                                         rc = stored_rc;
1676                                 } else
1677                                         /*
1678                                          * The unlock range request succeed -
1679                                          * free the tmp list.
1680                                          */
1681                                         cifs_free_llist(&tmp_llist);
1682                                 cur = buf;
1683                                 num = 0;
1684                         } else
1685                                 cur++;
1686                 }
1687                 if (num) {
1688                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1689                                                types[i], num, 0, buf);
1690                         if (stored_rc) {
1691                                 cifs_move_llist(&tmp_llist,
1692                                                 &cfile->llist->locks);
1693                                 rc = stored_rc;
1694                         } else
1695                                 cifs_free_llist(&tmp_llist);
1696                 }
1697         }
1698
1699         up_write(&cinode->lock_sem);
1700         kfree(buf);
1701         return rc;
1702 }
1703
1704 static int
1705 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1706            bool wait_flag, bool posix_lck, int lock, int unlock,
1707            unsigned int xid)
1708 {
1709         int rc = 0;
1710         __u64 length = 1 + flock->fl_end - flock->fl_start;
1711         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1712         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1713         struct TCP_Server_Info *server = tcon->ses->server;
1714         struct inode *inode = d_inode(cfile->dentry);
1715
1716         if (posix_lck) {
1717                 int posix_lock_type;
1718
1719                 rc = cifs_posix_lock_set(file, flock);
1720                 if (rc <= FILE_LOCK_DEFERRED)
1721                         return rc;
1722
1723                 if (type & server->vals->shared_lock_type)
1724                         posix_lock_type = CIFS_RDLCK;
1725                 else
1726                         posix_lock_type = CIFS_WRLCK;
1727
1728                 if (unlock == 1)
1729                         posix_lock_type = CIFS_UNLCK;
1730
1731                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1732                                       hash_lockowner(flock->fl_owner),
1733                                       flock->fl_start, length,
1734                                       NULL, posix_lock_type, wait_flag);
1735                 goto out;
1736         }
1737
1738         if (lock) {
1739                 struct cifsLockInfo *lock;
1740
1741                 lock = cifs_lock_init(flock->fl_start, length, type,
1742                                       flock->fl_flags);
1743                 if (!lock)
1744                         return -ENOMEM;
1745
1746                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1747                 if (rc < 0) {
1748                         kfree(lock);
1749                         return rc;
1750                 }
1751                 if (!rc)
1752                         goto out;
1753
1754                 /*
1755                  * Windows 7 server can delay breaking lease from read to None
1756                  * if we set a byte-range lock on a file - break it explicitly
1757                  * before sending the lock to the server to be sure the next
1758                  * read won't conflict with non-overlapted locks due to
1759                  * pagereading.
1760                  */
1761                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1762                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1763                         cifs_zap_mapping(inode);
1764                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1765                                  inode);
1766                         CIFS_I(inode)->oplock = 0;
1767                 }
1768
1769                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770                                             type, 1, 0, wait_flag);
1771                 if (rc) {
1772                         kfree(lock);
1773                         return rc;
1774                 }
1775
1776                 cifs_lock_add(cfile, lock);
1777         } else if (unlock)
1778                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1779
1780 out:
1781         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1782                 /*
1783                  * If this is a request to remove all locks because we
1784                  * are closing the file, it doesn't matter if the
1785                  * unlocking failed as both cifs.ko and the SMB server
1786                  * remove the lock on file close
1787                  */
1788                 if (rc) {
1789                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1790                         if (!(flock->fl_flags & FL_CLOSE))
1791                                 return rc;
1792                 }
1793                 rc = locks_lock_file_wait(file, flock);
1794         }
1795         return rc;
1796 }
1797
1798 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1799 {
1800         int rc, xid;
1801         int lock = 0, unlock = 0;
1802         bool wait_flag = false;
1803         bool posix_lck = false;
1804         struct cifs_sb_info *cifs_sb;
1805         struct cifs_tcon *tcon;
1806         struct cifsFileInfo *cfile;
1807         __u32 type;
1808
1809         rc = -EACCES;
1810         xid = get_xid();
1811
1812         if (!(fl->fl_flags & FL_FLOCK))
1813                 return -ENOLCK;
1814
1815         cfile = (struct cifsFileInfo *)file->private_data;
1816         tcon = tlink_tcon(cfile->tlink);
1817
1818         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1819                         tcon->ses->server);
1820         cifs_sb = CIFS_FILE_SB(file);
1821
1822         if (cap_unix(tcon->ses) &&
1823             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1824             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1825                 posix_lck = true;
1826
1827         if (!lock && !unlock) {
1828                 /*
1829                  * if no lock or unlock then nothing to do since we do not
1830                  * know what it is
1831                  */
1832                 free_xid(xid);
1833                 return -EOPNOTSUPP;
1834         }
1835
1836         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1837                         xid);
1838         free_xid(xid);
1839         return rc;
1840
1841
1842 }
1843
1844 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1845 {
1846         int rc, xid;
1847         int lock = 0, unlock = 0;
1848         bool wait_flag = false;
1849         bool posix_lck = false;
1850         struct cifs_sb_info *cifs_sb;
1851         struct cifs_tcon *tcon;
1852         struct cifsFileInfo *cfile;
1853         __u32 type;
1854
1855         rc = -EACCES;
1856         xid = get_xid();
1857
1858         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1859                  cmd, flock->fl_flags, flock->fl_type,
1860                  flock->fl_start, flock->fl_end);
1861
1862         cfile = (struct cifsFileInfo *)file->private_data;
1863         tcon = tlink_tcon(cfile->tlink);
1864
1865         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1866                         tcon->ses->server);
1867         cifs_sb = CIFS_FILE_SB(file);
1868         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1869
1870         if (cap_unix(tcon->ses) &&
1871             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1872             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1873                 posix_lck = true;
1874         /*
1875          * BB add code here to normalize offset and length to account for
1876          * negative length which we can not accept over the wire.
1877          */
1878         if (IS_GETLK(cmd)) {
1879                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1880                 free_xid(xid);
1881                 return rc;
1882         }
1883
1884         if (!lock && !unlock) {
1885                 /*
1886                  * if no lock or unlock then nothing to do since we do not
1887                  * know what it is
1888                  */
1889                 free_xid(xid);
1890                 return -EOPNOTSUPP;
1891         }
1892
1893         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1894                         xid);
1895         free_xid(xid);
1896         return rc;
1897 }
1898
1899 /*
1900  * update the file size (if needed) after a write. Should be called with
1901  * the inode->i_lock held
1902  */
1903 void
1904 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1905                       unsigned int bytes_written)
1906 {
1907         loff_t end_of_write = offset + bytes_written;
1908
1909         if (end_of_write > cifsi->server_eof)
1910                 cifsi->server_eof = end_of_write;
1911 }
1912
1913 static ssize_t
1914 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1915            size_t write_size, loff_t *offset)
1916 {
1917         int rc = 0;
1918         unsigned int bytes_written = 0;
1919         unsigned int total_written;
1920         struct cifs_tcon *tcon;
1921         struct TCP_Server_Info *server;
1922         unsigned int xid;
1923         struct dentry *dentry = open_file->dentry;
1924         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1925         struct cifs_io_parms io_parms = {0};
1926
1927         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1928                  write_size, *offset, dentry);
1929
1930         tcon = tlink_tcon(open_file->tlink);
1931         server = tcon->ses->server;
1932
1933         if (!server->ops->sync_write)
1934                 return -ENOSYS;
1935
1936         xid = get_xid();
1937
1938         for (total_written = 0; write_size > total_written;
1939              total_written += bytes_written) {
1940                 rc = -EAGAIN;
1941                 while (rc == -EAGAIN) {
1942                         struct kvec iov[2];
1943                         unsigned int len;
1944
1945                         if (open_file->invalidHandle) {
1946                                 /* we could deadlock if we called
1947                                    filemap_fdatawait from here so tell
1948                                    reopen_file not to flush data to
1949                                    server now */
1950                                 rc = cifs_reopen_file(open_file, false);
1951                                 if (rc != 0)
1952                                         break;
1953                         }
1954
1955                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1956                                   (unsigned int)write_size - total_written);
1957                         /* iov[0] is reserved for smb header */
1958                         iov[1].iov_base = (char *)write_data + total_written;
1959                         iov[1].iov_len = len;
1960                         io_parms.pid = pid;
1961                         io_parms.tcon = tcon;
1962                         io_parms.offset = *offset;
1963                         io_parms.length = len;
1964                         rc = server->ops->sync_write(xid, &open_file->fid,
1965                                         &io_parms, &bytes_written, iov, 1);
1966                 }
1967                 if (rc || (bytes_written == 0)) {
1968                         if (total_written)
1969                                 break;
1970                         else {
1971                                 free_xid(xid);
1972                                 return rc;
1973                         }
1974                 } else {
1975                         spin_lock(&d_inode(dentry)->i_lock);
1976                         cifs_update_eof(cifsi, *offset, bytes_written);
1977                         spin_unlock(&d_inode(dentry)->i_lock);
1978                         *offset += bytes_written;
1979                 }
1980         }
1981
1982         cifs_stats_bytes_written(tcon, total_written);
1983
1984         if (total_written > 0) {
1985                 spin_lock(&d_inode(dentry)->i_lock);
1986                 if (*offset > d_inode(dentry)->i_size) {
1987                         i_size_write(d_inode(dentry), *offset);
1988                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1989                 }
1990                 spin_unlock(&d_inode(dentry)->i_lock);
1991         }
1992         mark_inode_dirty_sync(d_inode(dentry));
1993         free_xid(xid);
1994         return total_written;
1995 }
1996
1997 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1998                                         bool fsuid_only)
1999 {
2000         struct cifsFileInfo *open_file = NULL;
2001         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2002
2003         /* only filter by fsuid on multiuser mounts */
2004         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2005                 fsuid_only = false;
2006
2007         spin_lock(&cifs_inode->open_file_lock);
2008         /* we could simply get the first_list_entry since write-only entries
2009            are always at the end of the list but since the first entry might
2010            have a close pending, we go through the whole list */
2011         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2012                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2013                         continue;
2014                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2015                         if ((!open_file->invalidHandle)) {
2016                                 /* found a good file */
2017                                 /* lock it so it will not be closed on us */
2018                                 cifsFileInfo_get(open_file);
2019                                 spin_unlock(&cifs_inode->open_file_lock);
2020                                 return open_file;
2021                         } /* else might as well continue, and look for
2022                              another, or simply have the caller reopen it
2023                              again rather than trying to fix this handle */
2024                 } else /* write only file */
2025                         break; /* write only files are last so must be done */
2026         }
2027         spin_unlock(&cifs_inode->open_file_lock);
2028         return NULL;
2029 }
2030
2031 /* Return -EBADF if no handle is found and general rc otherwise */
2032 int
2033 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2034                        struct cifsFileInfo **ret_file)
2035 {
2036         struct cifsFileInfo *open_file, *inv_file = NULL;
2037         struct cifs_sb_info *cifs_sb;
2038         bool any_available = false;
2039         int rc = -EBADF;
2040         unsigned int refind = 0;
2041         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2042         bool with_delete = flags & FIND_WR_WITH_DELETE;
2043         *ret_file = NULL;
2044
2045         /*
2046          * Having a null inode here (because mapping->host was set to zero by
2047          * the VFS or MM) should not happen but we had reports of on oops (due
2048          * to it being zero) during stress testcases so we need to check for it
2049          */
2050
2051         if (cifs_inode == NULL) {
2052                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2053                 dump_stack();
2054                 return rc;
2055         }
2056
2057         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2058
2059         /* only filter by fsuid on multiuser mounts */
2060         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2061                 fsuid_only = false;
2062
2063         spin_lock(&cifs_inode->open_file_lock);
2064 refind_writable:
2065         if (refind > MAX_REOPEN_ATT) {
2066                 spin_unlock(&cifs_inode->open_file_lock);
2067                 return rc;
2068         }
2069         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2070                 if (!any_available && open_file->pid != current->tgid)
2071                         continue;
2072                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2073                         continue;
2074                 if (with_delete && !(open_file->fid.access & DELETE))
2075                         continue;
2076                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2077                         if (!open_file->invalidHandle) {
2078                                 /* found a good writable file */
2079                                 cifsFileInfo_get(open_file);
2080                                 spin_unlock(&cifs_inode->open_file_lock);
2081                                 *ret_file = open_file;
2082                                 return 0;
2083                         } else {
2084                                 if (!inv_file)
2085                                         inv_file = open_file;
2086                         }
2087                 }
2088         }
2089         /* couldn't find useable FH with same pid, try any available */
2090         if (!any_available) {
2091                 any_available = true;
2092                 goto refind_writable;
2093         }
2094
2095         if (inv_file) {
2096                 any_available = false;
2097                 cifsFileInfo_get(inv_file);
2098         }
2099
2100         spin_unlock(&cifs_inode->open_file_lock);
2101
2102         if (inv_file) {
2103                 rc = cifs_reopen_file(inv_file, false);
2104                 if (!rc) {
2105                         *ret_file = inv_file;
2106                         return 0;
2107                 }
2108
2109                 spin_lock(&cifs_inode->open_file_lock);
2110                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2111                 spin_unlock(&cifs_inode->open_file_lock);
2112                 cifsFileInfo_put(inv_file);
2113                 ++refind;
2114                 inv_file = NULL;
2115                 spin_lock(&cifs_inode->open_file_lock);
2116                 goto refind_writable;
2117         }
2118
2119         return rc;
2120 }
2121
2122 struct cifsFileInfo *
2123 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2124 {
2125         struct cifsFileInfo *cfile;
2126         int rc;
2127
2128         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2129         if (rc)
2130                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2131
2132         return cfile;
2133 }
2134
2135 int
2136 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2137                        int flags,
2138                        struct cifsFileInfo **ret_file)
2139 {
2140         struct cifsFileInfo *cfile;
2141         void *page = alloc_dentry_path();
2142
2143         *ret_file = NULL;
2144
2145         spin_lock(&tcon->open_file_lock);
2146         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2147                 struct cifsInodeInfo *cinode;
2148                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2149                 if (IS_ERR(full_path)) {
2150                         spin_unlock(&tcon->open_file_lock);
2151                         free_dentry_path(page);
2152                         return PTR_ERR(full_path);
2153                 }
2154                 if (strcmp(full_path, name))
2155                         continue;
2156
2157                 cinode = CIFS_I(d_inode(cfile->dentry));
2158                 spin_unlock(&tcon->open_file_lock);
2159                 free_dentry_path(page);
2160                 return cifs_get_writable_file(cinode, flags, ret_file);
2161         }
2162
2163         spin_unlock(&tcon->open_file_lock);
2164         free_dentry_path(page);
2165         return -ENOENT;
2166 }
2167
2168 int
2169 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2170                        struct cifsFileInfo **ret_file)
2171 {
2172         struct cifsFileInfo *cfile;
2173         void *page = alloc_dentry_path();
2174
2175         *ret_file = NULL;
2176
2177         spin_lock(&tcon->open_file_lock);
2178         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2179                 struct cifsInodeInfo *cinode;
2180                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2181                 if (IS_ERR(full_path)) {
2182                         spin_unlock(&tcon->open_file_lock);
2183                         free_dentry_path(page);
2184                         return PTR_ERR(full_path);
2185                 }
2186                 if (strcmp(full_path, name))
2187                         continue;
2188
2189                 cinode = CIFS_I(d_inode(cfile->dentry));
2190                 spin_unlock(&tcon->open_file_lock);
2191                 free_dentry_path(page);
2192                 *ret_file = find_readable_file(cinode, 0);
2193                 return *ret_file ? 0 : -ENOENT;
2194         }
2195
2196         spin_unlock(&tcon->open_file_lock);
2197         free_dentry_path(page);
2198         return -ENOENT;
2199 }
2200
2201 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2202 {
2203         struct address_space *mapping = page->mapping;
2204         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2205         char *write_data;
2206         int rc = -EFAULT;
2207         int bytes_written = 0;
2208         struct inode *inode;
2209         struct cifsFileInfo *open_file;
2210
2211         if (!mapping || !mapping->host)
2212                 return -EFAULT;
2213
2214         inode = page->mapping->host;
2215
2216         offset += (loff_t)from;
2217         write_data = kmap(page);
2218         write_data += from;
2219
2220         if ((to > PAGE_SIZE) || (from > to)) {
2221                 kunmap(page);
2222                 return -EIO;
2223         }
2224
2225         /* racing with truncate? */
2226         if (offset > mapping->host->i_size) {
2227                 kunmap(page);
2228                 return 0; /* don't care */
2229         }
2230
2231         /* check to make sure that we are not extending the file */
2232         if (mapping->host->i_size - offset < (loff_t)to)
2233                 to = (unsigned)(mapping->host->i_size - offset);
2234
2235         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2236                                     &open_file);
2237         if (!rc) {
2238                 bytes_written = cifs_write(open_file, open_file->pid,
2239                                            write_data, to - from, &offset);
2240                 cifsFileInfo_put(open_file);
2241                 /* Does mm or vfs already set times? */
2242                 inode->i_atime = inode->i_mtime = current_time(inode);
2243                 if ((bytes_written > 0) && (offset))
2244                         rc = 0;
2245                 else if (bytes_written < 0)
2246                         rc = bytes_written;
2247                 else
2248                         rc = -EFAULT;
2249         } else {
2250                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2251                 if (!is_retryable_error(rc))
2252                         rc = -EIO;
2253         }
2254
2255         kunmap(page);
2256         return rc;
2257 }
2258
2259 static struct cifs_writedata *
2260 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2261                           pgoff_t end, pgoff_t *index,
2262                           unsigned int *found_pages)
2263 {
2264         struct cifs_writedata *wdata;
2265
2266         wdata = cifs_writedata_alloc((unsigned int)tofind,
2267                                      cifs_writev_complete);
2268         if (!wdata)
2269                 return NULL;
2270
2271         *found_pages = find_get_pages_range_tag(mapping, index, end,
2272                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2273         return wdata;
2274 }
2275
2276 static unsigned int
2277 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2278                     struct address_space *mapping,
2279                     struct writeback_control *wbc,
2280                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2281 {
2282         unsigned int nr_pages = 0, i;
2283         struct page *page;
2284
2285         for (i = 0; i < found_pages; i++) {
2286                 page = wdata->pages[i];
2287                 /*
2288                  * At this point we hold neither the i_pages lock nor the
2289                  * page lock: the page may be truncated or invalidated
2290                  * (changing page->mapping to NULL), or even swizzled
2291                  * back from swapper_space to tmpfs file mapping
2292                  */
2293
2294                 if (nr_pages == 0)
2295                         lock_page(page);
2296                 else if (!trylock_page(page))
2297                         break;
2298
2299                 if (unlikely(page->mapping != mapping)) {
2300                         unlock_page(page);
2301                         break;
2302                 }
2303
2304                 if (!wbc->range_cyclic && page->index > end) {
2305                         *done = true;
2306                         unlock_page(page);
2307                         break;
2308                 }
2309
2310                 if (*next && (page->index != *next)) {
2311                         /* Not next consecutive page */
2312                         unlock_page(page);
2313                         break;
2314                 }
2315
2316                 if (wbc->sync_mode != WB_SYNC_NONE)
2317                         wait_on_page_writeback(page);
2318
2319                 if (PageWriteback(page) ||
2320                                 !clear_page_dirty_for_io(page)) {
2321                         unlock_page(page);
2322                         break;
2323                 }
2324
2325                 /*
2326                  * This actually clears the dirty bit in the radix tree.
2327                  * See cifs_writepage() for more commentary.
2328                  */
2329                 set_page_writeback(page);
2330                 if (page_offset(page) >= i_size_read(mapping->host)) {
2331                         *done = true;
2332                         unlock_page(page);
2333                         end_page_writeback(page);
2334                         break;
2335                 }
2336
2337                 wdata->pages[i] = page;
2338                 *next = page->index + 1;
2339                 ++nr_pages;
2340         }
2341
2342         /* reset index to refind any pages skipped */
2343         if (nr_pages == 0)
2344                 *index = wdata->pages[0]->index + 1;
2345
2346         /* put any pages we aren't going to use */
2347         for (i = nr_pages; i < found_pages; i++) {
2348                 put_page(wdata->pages[i]);
2349                 wdata->pages[i] = NULL;
2350         }
2351
2352         return nr_pages;
2353 }
2354
2355 static int
2356 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2357                  struct address_space *mapping, struct writeback_control *wbc)
2358 {
2359         int rc;
2360
2361         wdata->sync_mode = wbc->sync_mode;
2362         wdata->nr_pages = nr_pages;
2363         wdata->offset = page_offset(wdata->pages[0]);
2364         wdata->pagesz = PAGE_SIZE;
2365         wdata->tailsz = min(i_size_read(mapping->host) -
2366                         page_offset(wdata->pages[nr_pages - 1]),
2367                         (loff_t)PAGE_SIZE);
2368         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2369         wdata->pid = wdata->cfile->pid;
2370
2371         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2372         if (rc)
2373                 return rc;
2374
2375         if (wdata->cfile->invalidHandle)
2376                 rc = -EAGAIN;
2377         else
2378                 rc = wdata->server->ops->async_writev(wdata,
2379                                                       cifs_writedata_release);
2380
2381         return rc;
2382 }
2383
2384 static int cifs_writepages(struct address_space *mapping,
2385                            struct writeback_control *wbc)
2386 {
2387         struct inode *inode = mapping->host;
2388         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2389         struct TCP_Server_Info *server;
2390         bool done = false, scanned = false, range_whole = false;
2391         pgoff_t end, index;
2392         struct cifs_writedata *wdata;
2393         struct cifsFileInfo *cfile = NULL;
2394         int rc = 0;
2395         int saved_rc = 0;
2396         unsigned int xid;
2397
2398         /*
2399          * If wsize is smaller than the page cache size, default to writing
2400          * one page at a time via cifs_writepage
2401          */
2402         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2403                 return generic_writepages(mapping, wbc);
2404
2405         xid = get_xid();
2406         if (wbc->range_cyclic) {
2407                 index = mapping->writeback_index; /* Start from prev offset */
2408                 end = -1;
2409         } else {
2410                 index = wbc->range_start >> PAGE_SHIFT;
2411                 end = wbc->range_end >> PAGE_SHIFT;
2412                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2413                         range_whole = true;
2414                 scanned = true;
2415         }
2416         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2417
2418 retry:
2419         while (!done && index <= end) {
2420                 unsigned int i, nr_pages, found_pages, wsize;
2421                 pgoff_t next = 0, tofind, saved_index = index;
2422                 struct cifs_credits credits_on_stack;
2423                 struct cifs_credits *credits = &credits_on_stack;
2424                 int get_file_rc = 0;
2425
2426                 if (cfile)
2427                         cifsFileInfo_put(cfile);
2428
2429                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2430
2431                 /* in case of an error store it to return later */
2432                 if (rc)
2433                         get_file_rc = rc;
2434
2435                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2436                                                    &wsize, credits);
2437                 if (rc != 0) {
2438                         done = true;
2439                         break;
2440                 }
2441
2442                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2443
2444                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2445                                                   &found_pages);
2446                 if (!wdata) {
2447                         rc = -ENOMEM;
2448                         done = true;
2449                         add_credits_and_wake_if(server, credits, 0);
2450                         break;
2451                 }
2452
2453                 if (found_pages == 0) {
2454                         kref_put(&wdata->refcount, cifs_writedata_release);
2455                         add_credits_and_wake_if(server, credits, 0);
2456                         break;
2457                 }
2458
2459                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2460                                                end, &index, &next, &done);
2461
2462                 /* nothing to write? */
2463                 if (nr_pages == 0) {
2464                         kref_put(&wdata->refcount, cifs_writedata_release);
2465                         add_credits_and_wake_if(server, credits, 0);
2466                         continue;
2467                 }
2468
2469                 wdata->credits = credits_on_stack;
2470                 wdata->cfile = cfile;
2471                 wdata->server = server;
2472                 cfile = NULL;
2473
2474                 if (!wdata->cfile) {
2475                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2476                                  get_file_rc);
2477                         if (is_retryable_error(get_file_rc))
2478                                 rc = get_file_rc;
2479                         else
2480                                 rc = -EBADF;
2481                 } else
2482                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2483
2484                 for (i = 0; i < nr_pages; ++i)
2485                         unlock_page(wdata->pages[i]);
2486
2487                 /* send failure -- clean up the mess */
2488                 if (rc != 0) {
2489                         add_credits_and_wake_if(server, &wdata->credits, 0);
2490                         for (i = 0; i < nr_pages; ++i) {
2491                                 if (is_retryable_error(rc))
2492                                         redirty_page_for_writepage(wbc,
2493                                                            wdata->pages[i]);
2494                                 else
2495                                         SetPageError(wdata->pages[i]);
2496                                 end_page_writeback(wdata->pages[i]);
2497                                 put_page(wdata->pages[i]);
2498                         }
2499                         if (!is_retryable_error(rc))
2500                                 mapping_set_error(mapping, rc);
2501                 }
2502                 kref_put(&wdata->refcount, cifs_writedata_release);
2503
2504                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2505                         index = saved_index;
2506                         continue;
2507                 }
2508
2509                 /* Return immediately if we received a signal during writing */
2510                 if (is_interrupt_error(rc)) {
2511                         done = true;
2512                         break;
2513                 }
2514
2515                 if (rc != 0 && saved_rc == 0)
2516                         saved_rc = rc;
2517
2518                 wbc->nr_to_write -= nr_pages;
2519                 if (wbc->nr_to_write <= 0)
2520                         done = true;
2521
2522                 index = next;
2523         }
2524
2525         if (!scanned && !done) {
2526                 /*
2527                  * We hit the last page and there is more work to be done: wrap
2528                  * back to the start of the file
2529                  */
2530                 scanned = true;
2531                 index = 0;
2532                 goto retry;
2533         }
2534
2535         if (saved_rc != 0)
2536                 rc = saved_rc;
2537
2538         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2539                 mapping->writeback_index = index;
2540
2541         if (cfile)
2542                 cifsFileInfo_put(cfile);
2543         free_xid(xid);
2544         /* Indication to update ctime and mtime as close is deferred */
2545         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2546         return rc;
2547 }
2548
2549 static int
2550 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2551 {
2552         int rc;
2553         unsigned int xid;
2554
2555         xid = get_xid();
2556 /* BB add check for wbc flags */
2557         get_page(page);
2558         if (!PageUptodate(page))
2559                 cifs_dbg(FYI, "ppw - page not up to date\n");
2560
2561         /*
2562          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2563          *
2564          * A writepage() implementation always needs to do either this,
2565          * or re-dirty the page with "redirty_page_for_writepage()" in
2566          * the case of a failure.
2567          *
2568          * Just unlocking the page will cause the radix tree tag-bits
2569          * to fail to update with the state of the page correctly.
2570          */
2571         set_page_writeback(page);
2572 retry_write:
2573         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2574         if (is_retryable_error(rc)) {
2575                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2576                         goto retry_write;
2577                 redirty_page_for_writepage(wbc, page);
2578         } else if (rc != 0) {
2579                 SetPageError(page);
2580                 mapping_set_error(page->mapping, rc);
2581         } else {
2582                 SetPageUptodate(page);
2583         }
2584         end_page_writeback(page);
2585         put_page(page);
2586         free_xid(xid);
2587         return rc;
2588 }
2589
2590 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2591 {
2592         int rc = cifs_writepage_locked(page, wbc);
2593         unlock_page(page);
2594         return rc;
2595 }
2596
2597 static int cifs_write_end(struct file *file, struct address_space *mapping,
2598                         loff_t pos, unsigned len, unsigned copied,
2599                         struct page *page, void *fsdata)
2600 {
2601         int rc;
2602         struct inode *inode = mapping->host;
2603         struct cifsFileInfo *cfile = file->private_data;
2604         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2605         __u32 pid;
2606
2607         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2608                 pid = cfile->pid;
2609         else
2610                 pid = current->tgid;
2611
2612         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2613                  page, pos, copied);
2614
2615         if (PageChecked(page)) {
2616                 if (copied == len)
2617                         SetPageUptodate(page);
2618                 ClearPageChecked(page);
2619         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2620                 SetPageUptodate(page);
2621
2622         if (!PageUptodate(page)) {
2623                 char *page_data;
2624                 unsigned offset = pos & (PAGE_SIZE - 1);
2625                 unsigned int xid;
2626
2627                 xid = get_xid();
2628                 /* this is probably better than directly calling
2629                    partialpage_write since in this function the file handle is
2630                    known which we might as well leverage */
2631                 /* BB check if anything else missing out of ppw
2632                    such as updating last write time */
2633                 page_data = kmap(page);
2634                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2635                 /* if (rc < 0) should we set writebehind rc? */
2636                 kunmap(page);
2637
2638                 free_xid(xid);
2639         } else {
2640                 rc = copied;
2641                 pos += copied;
2642                 set_page_dirty(page);
2643         }
2644
2645         if (rc > 0) {
2646                 spin_lock(&inode->i_lock);
2647                 if (pos > inode->i_size) {
2648                         i_size_write(inode, pos);
2649                         inode->i_blocks = (512 - 1 + pos) >> 9;
2650                 }
2651                 spin_unlock(&inode->i_lock);
2652         }
2653
2654         unlock_page(page);
2655         put_page(page);
2656         /* Indication to update ctime and mtime as close is deferred */
2657         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2658
2659         return rc;
2660 }
2661
2662 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2663                       int datasync)
2664 {
2665         unsigned int xid;
2666         int rc = 0;
2667         struct cifs_tcon *tcon;
2668         struct TCP_Server_Info *server;
2669         struct cifsFileInfo *smbfile = file->private_data;
2670         struct inode *inode = file_inode(file);
2671         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2672
2673         rc = file_write_and_wait_range(file, start, end);
2674         if (rc) {
2675                 trace_cifs_fsync_err(inode->i_ino, rc);
2676                 return rc;
2677         }
2678
2679         xid = get_xid();
2680
2681         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2682                  file, datasync);
2683
2684         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2685                 rc = cifs_zap_mapping(inode);
2686                 if (rc) {
2687                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2688                         rc = 0; /* don't care about it in fsync */
2689                 }
2690         }
2691
2692         tcon = tlink_tcon(smbfile->tlink);
2693         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2694                 server = tcon->ses->server;
2695                 if (server->ops->flush == NULL) {
2696                         rc = -ENOSYS;
2697                         goto strict_fsync_exit;
2698                 }
2699
2700                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2701                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2702                         if (smbfile) {
2703                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2704                                 cifsFileInfo_put(smbfile);
2705                         } else
2706                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2707                 } else
2708                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2709         }
2710
2711 strict_fsync_exit:
2712         free_xid(xid);
2713         return rc;
2714 }
2715
2716 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2717 {
2718         unsigned int xid;
2719         int rc = 0;
2720         struct cifs_tcon *tcon;
2721         struct TCP_Server_Info *server;
2722         struct cifsFileInfo *smbfile = file->private_data;
2723         struct inode *inode = file_inode(file);
2724         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2725
2726         rc = file_write_and_wait_range(file, start, end);
2727         if (rc) {
2728                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2729                 return rc;
2730         }
2731
2732         xid = get_xid();
2733
2734         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2735                  file, datasync);
2736
2737         tcon = tlink_tcon(smbfile->tlink);
2738         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2739                 server = tcon->ses->server;
2740                 if (server->ops->flush == NULL) {
2741                         rc = -ENOSYS;
2742                         goto fsync_exit;
2743                 }
2744
2745                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2746                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2747                         if (smbfile) {
2748                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2749                                 cifsFileInfo_put(smbfile);
2750                         } else
2751                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2752                 } else
2753                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2754         }
2755
2756 fsync_exit:
2757         free_xid(xid);
2758         return rc;
2759 }
2760
2761 /*
2762  * As file closes, flush all cached write data for this inode checking
2763  * for write behind errors.
2764  */
2765 int cifs_flush(struct file *file, fl_owner_t id)
2766 {
2767         struct inode *inode = file_inode(file);
2768         int rc = 0;
2769
2770         if (file->f_mode & FMODE_WRITE)
2771                 rc = filemap_write_and_wait(inode->i_mapping);
2772
2773         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2774         if (rc)
2775                 trace_cifs_flush_err(inode->i_ino, rc);
2776         return rc;
2777 }
2778
2779 static int
2780 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2781 {
2782         int rc = 0;
2783         unsigned long i;
2784
2785         for (i = 0; i < num_pages; i++) {
2786                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2787                 if (!pages[i]) {
2788                         /*
2789                          * save number of pages we have already allocated and
2790                          * return with ENOMEM error
2791                          */
2792                         num_pages = i;
2793                         rc = -ENOMEM;
2794                         break;
2795                 }
2796         }
2797
2798         if (rc) {
2799                 for (i = 0; i < num_pages; i++)
2800                         put_page(pages[i]);
2801         }
2802         return rc;
2803 }
2804
2805 static inline
2806 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2807 {
2808         size_t num_pages;
2809         size_t clen;
2810
2811         clen = min_t(const size_t, len, wsize);
2812         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2813
2814         if (cur_len)
2815                 *cur_len = clen;
2816
2817         return num_pages;
2818 }
2819
2820 static void
2821 cifs_uncached_writedata_release(struct kref *refcount)
2822 {
2823         int i;
2824         struct cifs_writedata *wdata = container_of(refcount,
2825                                         struct cifs_writedata, refcount);
2826
2827         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2828         for (i = 0; i < wdata->nr_pages; i++)
2829                 put_page(wdata->pages[i]);
2830         cifs_writedata_release(refcount);
2831 }
2832
2833 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2834
2835 static void
2836 cifs_uncached_writev_complete(struct work_struct *work)
2837 {
2838         struct cifs_writedata *wdata = container_of(work,
2839                                         struct cifs_writedata, work);
2840         struct inode *inode = d_inode(wdata->cfile->dentry);
2841         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2842
2843         spin_lock(&inode->i_lock);
2844         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2845         if (cifsi->server_eof > inode->i_size)
2846                 i_size_write(inode, cifsi->server_eof);
2847         spin_unlock(&inode->i_lock);
2848
2849         complete(&wdata->done);
2850         collect_uncached_write_data(wdata->ctx);
2851         /* the below call can possibly free the last ref to aio ctx */
2852         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2853 }
2854
2855 static int
2856 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2857                       size_t *len, unsigned long *num_pages)
2858 {
2859         size_t save_len, copied, bytes, cur_len = *len;
2860         unsigned long i, nr_pages = *num_pages;
2861
2862         save_len = cur_len;
2863         for (i = 0; i < nr_pages; i++) {
2864                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2865                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2866                 cur_len -= copied;
2867                 /*
2868                  * If we didn't copy as much as we expected, then that
2869                  * may mean we trod into an unmapped area. Stop copying
2870                  * at that point. On the next pass through the big
2871                  * loop, we'll likely end up getting a zero-length
2872                  * write and bailing out of it.
2873                  */
2874                 if (copied < bytes)
2875                         break;
2876         }
2877         cur_len = save_len - cur_len;
2878         *len = cur_len;
2879
2880         /*
2881          * If we have no data to send, then that probably means that
2882          * the copy above failed altogether. That's most likely because
2883          * the address in the iovec was bogus. Return -EFAULT and let
2884          * the caller free anything we allocated and bail out.
2885          */
2886         if (!cur_len)
2887                 return -EFAULT;
2888
2889         /*
2890          * i + 1 now represents the number of pages we actually used in
2891          * the copy phase above.
2892          */
2893         *num_pages = i + 1;
2894         return 0;
2895 }
2896
2897 static int
2898 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2899         struct cifs_aio_ctx *ctx)
2900 {
2901         unsigned int wsize;
2902         struct cifs_credits credits;
2903         int rc;
2904         struct TCP_Server_Info *server = wdata->server;
2905
2906         do {
2907                 if (wdata->cfile->invalidHandle) {
2908                         rc = cifs_reopen_file(wdata->cfile, false);
2909                         if (rc == -EAGAIN)
2910                                 continue;
2911                         else if (rc)
2912                                 break;
2913                 }
2914
2915
2916                 /*
2917                  * Wait for credits to resend this wdata.
2918                  * Note: we are attempting to resend the whole wdata not in
2919                  * segments
2920                  */
2921                 do {
2922                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2923                                                 &wsize, &credits);
2924                         if (rc)
2925                                 goto fail;
2926
2927                         if (wsize < wdata->bytes) {
2928                                 add_credits_and_wake_if(server, &credits, 0);
2929                                 msleep(1000);
2930                         }
2931                 } while (wsize < wdata->bytes);
2932                 wdata->credits = credits;
2933
2934                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2935
2936                 if (!rc) {
2937                         if (wdata->cfile->invalidHandle)
2938                                 rc = -EAGAIN;
2939                         else {
2940 #ifdef CONFIG_CIFS_SMB_DIRECT
2941                                 if (wdata->mr) {
2942                                         wdata->mr->need_invalidate = true;
2943                                         smbd_deregister_mr(wdata->mr);
2944                                         wdata->mr = NULL;
2945                                 }
2946 #endif
2947                                 rc = server->ops->async_writev(wdata,
2948                                         cifs_uncached_writedata_release);
2949                         }
2950                 }
2951
2952                 /* If the write was successfully sent, we are done */
2953                 if (!rc) {
2954                         list_add_tail(&wdata->list, wdata_list);
2955                         return 0;
2956                 }
2957
2958                 /* Roll back credits and retry if needed */
2959                 add_credits_and_wake_if(server, &wdata->credits, 0);
2960         } while (rc == -EAGAIN);
2961
2962 fail:
2963         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2964         return rc;
2965 }
2966
2967 static int
2968 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2969                      struct cifsFileInfo *open_file,
2970                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2971                      struct cifs_aio_ctx *ctx)
2972 {
2973         int rc = 0;
2974         size_t cur_len;
2975         unsigned long nr_pages, num_pages, i;
2976         struct cifs_writedata *wdata;
2977         struct iov_iter saved_from = *from;
2978         loff_t saved_offset = offset;
2979         pid_t pid;
2980         struct TCP_Server_Info *server;
2981         struct page **pagevec;
2982         size_t start;
2983         unsigned int xid;
2984
2985         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2986                 pid = open_file->pid;
2987         else
2988                 pid = current->tgid;
2989
2990         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2991         xid = get_xid();
2992
2993         do {
2994                 unsigned int wsize;
2995                 struct cifs_credits credits_on_stack;
2996                 struct cifs_credits *credits = &credits_on_stack;
2997
2998                 if (open_file->invalidHandle) {
2999                         rc = cifs_reopen_file(open_file, false);
3000                         if (rc == -EAGAIN)
3001                                 continue;
3002                         else if (rc)
3003                                 break;
3004                 }
3005
3006                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3007                                                    &wsize, credits);
3008                 if (rc)
3009                         break;
3010
3011                 cur_len = min_t(const size_t, len, wsize);
3012
3013                 if (ctx->direct_io) {
3014                         ssize_t result;
3015
3016                         result = iov_iter_get_pages_alloc(
3017                                 from, &pagevec, cur_len, &start);
3018                         if (result < 0) {
3019                                 cifs_dbg(VFS,
3020                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3021                                          result, iov_iter_type(from),
3022                                          from->iov_offset, from->count);
3023                                 dump_stack();
3024
3025                                 rc = result;
3026                                 add_credits_and_wake_if(server, credits, 0);
3027                                 break;
3028                         }
3029                         cur_len = (size_t)result;
3030                         iov_iter_advance(from, cur_len);
3031
3032                         nr_pages =
3033                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3034
3035                         wdata = cifs_writedata_direct_alloc(pagevec,
3036                                              cifs_uncached_writev_complete);
3037                         if (!wdata) {
3038                                 rc = -ENOMEM;
3039                                 add_credits_and_wake_if(server, credits, 0);
3040                                 break;
3041                         }
3042
3043
3044                         wdata->page_offset = start;
3045                         wdata->tailsz =
3046                                 nr_pages > 1 ?
3047                                         cur_len - (PAGE_SIZE - start) -
3048                                         (nr_pages - 2) * PAGE_SIZE :
3049                                         cur_len;
3050                 } else {
3051                         nr_pages = get_numpages(wsize, len, &cur_len);
3052                         wdata = cifs_writedata_alloc(nr_pages,
3053                                              cifs_uncached_writev_complete);
3054                         if (!wdata) {
3055                                 rc = -ENOMEM;
3056                                 add_credits_and_wake_if(server, credits, 0);
3057                                 break;
3058                         }
3059
3060                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3061                         if (rc) {
3062                                 kvfree(wdata->pages);
3063                                 kfree(wdata);
3064                                 add_credits_and_wake_if(server, credits, 0);
3065                                 break;
3066                         }
3067
3068                         num_pages = nr_pages;
3069                         rc = wdata_fill_from_iovec(
3070                                 wdata, from, &cur_len, &num_pages);
3071                         if (rc) {
3072                                 for (i = 0; i < nr_pages; i++)
3073                                         put_page(wdata->pages[i]);
3074                                 kvfree(wdata->pages);
3075                                 kfree(wdata);
3076                                 add_credits_and_wake_if(server, credits, 0);
3077                                 break;
3078                         }
3079
3080                         /*
3081                          * Bring nr_pages down to the number of pages we
3082                          * actually used, and free any pages that we didn't use.
3083                          */
3084                         for ( ; nr_pages > num_pages; nr_pages--)
3085                                 put_page(wdata->pages[nr_pages - 1]);
3086
3087                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3088                 }
3089
3090                 wdata->sync_mode = WB_SYNC_ALL;
3091                 wdata->nr_pages = nr_pages;
3092                 wdata->offset = (__u64)offset;
3093                 wdata->cfile = cifsFileInfo_get(open_file);
3094                 wdata->server = server;
3095                 wdata->pid = pid;
3096                 wdata->bytes = cur_len;
3097                 wdata->pagesz = PAGE_SIZE;
3098                 wdata->credits = credits_on_stack;
3099                 wdata->ctx = ctx;
3100                 kref_get(&ctx->refcount);
3101
3102                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3103
3104                 if (!rc) {
3105                         if (wdata->cfile->invalidHandle)
3106                                 rc = -EAGAIN;
3107                         else
3108                                 rc = server->ops->async_writev(wdata,
3109                                         cifs_uncached_writedata_release);
3110                 }
3111
3112                 if (rc) {
3113                         add_credits_and_wake_if(server, &wdata->credits, 0);
3114                         kref_put(&wdata->refcount,
3115                                  cifs_uncached_writedata_release);
3116                         if (rc == -EAGAIN) {
3117                                 *from = saved_from;
3118                                 iov_iter_advance(from, offset - saved_offset);
3119                                 continue;
3120                         }
3121                         break;
3122                 }
3123
3124                 list_add_tail(&wdata->list, wdata_list);
3125                 offset += cur_len;
3126                 len -= cur_len;
3127         } while (len > 0);
3128
3129         free_xid(xid);
3130         return rc;
3131 }
3132
3133 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3134 {
3135         struct cifs_writedata *wdata, *tmp;
3136         struct cifs_tcon *tcon;
3137         struct cifs_sb_info *cifs_sb;
3138         struct dentry *dentry = ctx->cfile->dentry;
3139         ssize_t rc;
3140
3141         tcon = tlink_tcon(ctx->cfile->tlink);
3142         cifs_sb = CIFS_SB(dentry->d_sb);
3143
3144         mutex_lock(&ctx->aio_mutex);
3145
3146         if (list_empty(&ctx->list)) {
3147                 mutex_unlock(&ctx->aio_mutex);
3148                 return;
3149         }
3150
3151         rc = ctx->rc;
3152         /*
3153          * Wait for and collect replies for any successful sends in order of
3154          * increasing offset. Once an error is hit, then return without waiting
3155          * for any more replies.
3156          */
3157 restart_loop:
3158         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3159                 if (!rc) {
3160                         if (!try_wait_for_completion(&wdata->done)) {
3161                                 mutex_unlock(&ctx->aio_mutex);
3162                                 return;
3163                         }
3164
3165                         if (wdata->result)
3166                                 rc = wdata->result;
3167                         else
3168                                 ctx->total_len += wdata->bytes;
3169
3170                         /* resend call if it's a retryable error */
3171                         if (rc == -EAGAIN) {
3172                                 struct list_head tmp_list;
3173                                 struct iov_iter tmp_from = ctx->iter;
3174
3175                                 INIT_LIST_HEAD(&tmp_list);
3176                                 list_del_init(&wdata->list);
3177
3178                                 if (ctx->direct_io)
3179                                         rc = cifs_resend_wdata(
3180                                                 wdata, &tmp_list, ctx);
3181                                 else {
3182                                         iov_iter_advance(&tmp_from,
3183                                                  wdata->offset - ctx->pos);
3184
3185                                         rc = cifs_write_from_iter(wdata->offset,
3186                                                 wdata->bytes, &tmp_from,
3187                                                 ctx->cfile, cifs_sb, &tmp_list,
3188                                                 ctx);
3189
3190                                         kref_put(&wdata->refcount,
3191                                                 cifs_uncached_writedata_release);
3192                                 }
3193
3194                                 list_splice(&tmp_list, &ctx->list);
3195                                 goto restart_loop;
3196                         }
3197                 }
3198                 list_del_init(&wdata->list);
3199                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3200         }
3201
3202         cifs_stats_bytes_written(tcon, ctx->total_len);
3203         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3204
3205         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3206
3207         mutex_unlock(&ctx->aio_mutex);
3208
3209         if (ctx->iocb && ctx->iocb->ki_complete)
3210                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3211         else
3212                 complete(&ctx->done);
3213 }
3214
3215 static ssize_t __cifs_writev(
3216         struct kiocb *iocb, struct iov_iter *from, bool direct)
3217 {
3218         struct file *file = iocb->ki_filp;
3219         ssize_t total_written = 0;
3220         struct cifsFileInfo *cfile;
3221         struct cifs_tcon *tcon;
3222         struct cifs_sb_info *cifs_sb;
3223         struct cifs_aio_ctx *ctx;
3224         struct iov_iter saved_from = *from;
3225         size_t len = iov_iter_count(from);
3226         int rc;
3227
3228         /*
3229          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3230          * In this case, fall back to non-direct write function.
3231          * this could be improved by getting pages directly in ITER_KVEC
3232          */
3233         if (direct && iov_iter_is_kvec(from)) {
3234                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3235                 direct = false;
3236         }
3237
3238         rc = generic_write_checks(iocb, from);
3239         if (rc <= 0)
3240                 return rc;
3241
3242         cifs_sb = CIFS_FILE_SB(file);
3243         cfile = file->private_data;
3244         tcon = tlink_tcon(cfile->tlink);
3245
3246         if (!tcon->ses->server->ops->async_writev)
3247                 return -ENOSYS;
3248
3249         ctx = cifs_aio_ctx_alloc();
3250         if (!ctx)
3251                 return -ENOMEM;
3252
3253         ctx->cfile = cifsFileInfo_get(cfile);
3254
3255         if (!is_sync_kiocb(iocb))
3256                 ctx->iocb = iocb;
3257
3258         ctx->pos = iocb->ki_pos;
3259
3260         if (direct) {
3261                 ctx->direct_io = true;
3262                 ctx->iter = *from;
3263                 ctx->len = len;
3264         } else {
3265                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3266                 if (rc) {
3267                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3268                         return rc;
3269                 }
3270         }
3271
3272         /* grab a lock here due to read response handlers can access ctx */
3273         mutex_lock(&ctx->aio_mutex);
3274
3275         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3276                                   cfile, cifs_sb, &ctx->list, ctx);
3277
3278         /*
3279          * If at least one write was successfully sent, then discard any rc
3280          * value from the later writes. If the other write succeeds, then
3281          * we'll end up returning whatever was written. If it fails, then
3282          * we'll get a new rc value from that.
3283          */
3284         if (!list_empty(&ctx->list))
3285                 rc = 0;
3286
3287         mutex_unlock(&ctx->aio_mutex);
3288
3289         if (rc) {
3290                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3291                 return rc;
3292         }
3293
3294         if (!is_sync_kiocb(iocb)) {
3295                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3296                 return -EIOCBQUEUED;
3297         }
3298
3299         rc = wait_for_completion_killable(&ctx->done);
3300         if (rc) {
3301                 mutex_lock(&ctx->aio_mutex);
3302                 ctx->rc = rc = -EINTR;
3303                 total_written = ctx->total_len;
3304                 mutex_unlock(&ctx->aio_mutex);
3305         } else {
3306                 rc = ctx->rc;
3307                 total_written = ctx->total_len;
3308         }
3309
3310         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3311
3312         if (unlikely(!total_written))
3313                 return rc;
3314
3315         iocb->ki_pos += total_written;
3316         return total_written;
3317 }
3318
3319 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3320 {
3321         return __cifs_writev(iocb, from, true);
3322 }
3323
3324 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3325 {
3326         return __cifs_writev(iocb, from, false);
3327 }
3328
3329 static ssize_t
3330 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3331 {
3332         struct file *file = iocb->ki_filp;
3333         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3334         struct inode *inode = file->f_mapping->host;
3335         struct cifsInodeInfo *cinode = CIFS_I(inode);
3336         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3337         ssize_t rc;
3338
3339         inode_lock(inode);
3340         /*
3341          * We need to hold the sem to be sure nobody modifies lock list
3342          * with a brlock that prevents writing.
3343          */
3344         down_read(&cinode->lock_sem);
3345
3346         rc = generic_write_checks(iocb, from);
3347         if (rc <= 0)
3348                 goto out;
3349
3350         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3351                                      server->vals->exclusive_lock_type, 0,
3352                                      NULL, CIFS_WRITE_OP))
3353                 rc = __generic_file_write_iter(iocb, from);
3354         else
3355                 rc = -EACCES;
3356 out:
3357         up_read(&cinode->lock_sem);
3358         inode_unlock(inode);
3359
3360         if (rc > 0)
3361                 rc = generic_write_sync(iocb, rc);
3362         return rc;
3363 }
3364
3365 ssize_t
3366 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3367 {
3368         struct inode *inode = file_inode(iocb->ki_filp);
3369         struct cifsInodeInfo *cinode = CIFS_I(inode);
3370         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3371         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3372                                                 iocb->ki_filp->private_data;
3373         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3374         ssize_t written;
3375
3376         written = cifs_get_writer(cinode);
3377         if (written)
3378                 return written;
3379
3380         if (CIFS_CACHE_WRITE(cinode)) {
3381                 if (cap_unix(tcon->ses) &&
3382                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3383                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3384                         written = generic_file_write_iter(iocb, from);
3385                         goto out;
3386                 }
3387                 written = cifs_writev(iocb, from);
3388                 goto out;
3389         }
3390         /*
3391          * For non-oplocked files in strict cache mode we need to write the data
3392          * to the server exactly from the pos to pos+len-1 rather than flush all
3393          * affected pages because it may cause a error with mandatory locks on
3394          * these pages but not on the region from pos to ppos+len-1.
3395          */
3396         written = cifs_user_writev(iocb, from);
3397         if (CIFS_CACHE_READ(cinode)) {
3398                 /*
3399                  * We have read level caching and we have just sent a write
3400                  * request to the server thus making data in the cache stale.
3401                  * Zap the cache and set oplock/lease level to NONE to avoid
3402                  * reading stale data from the cache. All subsequent read
3403                  * operations will read new data from the server.
3404                  */
3405                 cifs_zap_mapping(inode);
3406                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3407                          inode);
3408                 cinode->oplock = 0;
3409         }
3410 out:
3411         cifs_put_writer(cinode);
3412         return written;
3413 }
3414
3415 static struct cifs_readdata *
3416 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3417 {
3418         struct cifs_readdata *rdata;
3419
3420         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3421         if (rdata != NULL) {
3422                 rdata->pages = pages;
3423                 kref_init(&rdata->refcount);
3424                 INIT_LIST_HEAD(&rdata->list);
3425                 init_completion(&rdata->done);
3426                 INIT_WORK(&rdata->work, complete);
3427         }
3428
3429         return rdata;
3430 }
3431
3432 static struct cifs_readdata *
3433 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3434 {
3435         struct page **pages =
3436                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3437         struct cifs_readdata *ret = NULL;
3438
3439         if (pages) {
3440                 ret = cifs_readdata_direct_alloc(pages, complete);
3441                 if (!ret)
3442                         kfree(pages);
3443         }
3444
3445         return ret;
3446 }
3447
3448 void
3449 cifs_readdata_release(struct kref *refcount)
3450 {
3451         struct cifs_readdata *rdata = container_of(refcount,
3452                                         struct cifs_readdata, refcount);
3453 #ifdef CONFIG_CIFS_SMB_DIRECT
3454         if (rdata->mr) {
3455                 smbd_deregister_mr(rdata->mr);
3456                 rdata->mr = NULL;
3457         }
3458 #endif
3459         if (rdata->cfile)
3460                 cifsFileInfo_put(rdata->cfile);
3461
3462         kvfree(rdata->pages);
3463         kfree(rdata);
3464 }
3465
3466 static int
3467 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3468 {
3469         int rc = 0;
3470         struct page *page;
3471         unsigned int i;
3472
3473         for (i = 0; i < nr_pages; i++) {
3474                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3475                 if (!page) {
3476                         rc = -ENOMEM;
3477                         break;
3478                 }
3479                 rdata->pages[i] = page;
3480         }
3481
3482         if (rc) {
3483                 unsigned int nr_page_failed = i;
3484
3485                 for (i = 0; i < nr_page_failed; i++) {
3486                         put_page(rdata->pages[i]);
3487                         rdata->pages[i] = NULL;
3488                 }
3489         }
3490         return rc;
3491 }
3492
3493 static void
3494 cifs_uncached_readdata_release(struct kref *refcount)
3495 {
3496         struct cifs_readdata *rdata = container_of(refcount,
3497                                         struct cifs_readdata, refcount);
3498         unsigned int i;
3499
3500         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3501         for (i = 0; i < rdata->nr_pages; i++) {
3502                 put_page(rdata->pages[i]);
3503         }
3504         cifs_readdata_release(refcount);
3505 }
3506
3507 /**
3508  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3509  * @rdata:      the readdata response with list of pages holding data
3510  * @iter:       destination for our data
3511  *
3512  * This function copies data from a list of pages in a readdata response into
3513  * an array of iovecs. It will first calculate where the data should go
3514  * based on the info in the readdata and then copy the data into that spot.
3515  */
3516 static int
3517 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3518 {
3519         size_t remaining = rdata->got_bytes;
3520         unsigned int i;
3521
3522         for (i = 0; i < rdata->nr_pages; i++) {
3523                 struct page *page = rdata->pages[i];
3524                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3525                 size_t written;
3526
3527                 if (unlikely(iov_iter_is_pipe(iter))) {
3528                         void *addr = kmap_atomic(page);
3529
3530                         written = copy_to_iter(addr, copy, iter);
3531                         kunmap_atomic(addr);
3532                 } else
3533                         written = copy_page_to_iter(page, 0, copy, iter);
3534                 remaining -= written;
3535                 if (written < copy && iov_iter_count(iter) > 0)
3536                         break;
3537         }
3538         return remaining ? -EFAULT : 0;
3539 }
3540
3541 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3542
3543 static void
3544 cifs_uncached_readv_complete(struct work_struct *work)
3545 {
3546         struct cifs_readdata *rdata = container_of(work,
3547                                                 struct cifs_readdata, work);
3548
3549         complete(&rdata->done);
3550         collect_uncached_read_data(rdata->ctx);
3551         /* the below call can possibly free the last ref to aio ctx */
3552         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3553 }
3554
3555 static int
3556 uncached_fill_pages(struct TCP_Server_Info *server,
3557                     struct cifs_readdata *rdata, struct iov_iter *iter,
3558                     unsigned int len)
3559 {
3560         int result = 0;
3561         unsigned int i;
3562         unsigned int nr_pages = rdata->nr_pages;
3563         unsigned int page_offset = rdata->page_offset;
3564
3565         rdata->got_bytes = 0;
3566         rdata->tailsz = PAGE_SIZE;
3567         for (i = 0; i < nr_pages; i++) {
3568                 struct page *page = rdata->pages[i];
3569                 size_t n;
3570                 unsigned int segment_size = rdata->pagesz;
3571
3572                 if (i == 0)
3573                         segment_size -= page_offset;
3574                 else
3575                         page_offset = 0;
3576
3577
3578                 if (len <= 0) {
3579                         /* no need to hold page hostage */
3580                         rdata->pages[i] = NULL;
3581                         rdata->nr_pages--;
3582                         put_page(page);
3583                         continue;
3584                 }
3585
3586                 n = len;
3587                 if (len >= segment_size)
3588                         /* enough data to fill the page */
3589                         n = segment_size;
3590                 else
3591                         rdata->tailsz = len;
3592                 len -= n;
3593
3594                 if (iter)
3595                         result = copy_page_from_iter(
3596                                         page, page_offset, n, iter);
3597 #ifdef CONFIG_CIFS_SMB_DIRECT
3598                 else if (rdata->mr)
3599                         result = n;
3600 #endif
3601                 else
3602                         result = cifs_read_page_from_socket(
3603                                         server, page, page_offset, n);
3604                 if (result < 0)
3605                         break;
3606
3607                 rdata->got_bytes += result;
3608         }
3609
3610         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3611                                                 rdata->got_bytes : result;
3612 }
3613
3614 static int
3615 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3616                               struct cifs_readdata *rdata, unsigned int len)
3617 {
3618         return uncached_fill_pages(server, rdata, NULL, len);
3619 }
3620
3621 static int
3622 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3623                               struct cifs_readdata *rdata,
3624                               struct iov_iter *iter)
3625 {
3626         return uncached_fill_pages(server, rdata, iter, iter->count);
3627 }
3628
3629 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3630                         struct list_head *rdata_list,
3631                         struct cifs_aio_ctx *ctx)
3632 {
3633         unsigned int rsize;
3634         struct cifs_credits credits;
3635         int rc;
3636         struct TCP_Server_Info *server;
3637
3638         /* XXX: should we pick a new channel here? */
3639         server = rdata->server;
3640
3641         do {
3642                 if (rdata->cfile->invalidHandle) {
3643                         rc = cifs_reopen_file(rdata->cfile, true);
3644                         if (rc == -EAGAIN)
3645                                 continue;
3646                         else if (rc)
3647                                 break;
3648                 }
3649
3650                 /*
3651                  * Wait for credits to resend this rdata.
3652                  * Note: we are attempting to resend the whole rdata not in
3653                  * segments
3654                  */
3655                 do {
3656                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3657                                                 &rsize, &credits);
3658
3659                         if (rc)
3660                                 goto fail;
3661
3662                         if (rsize < rdata->bytes) {
3663                                 add_credits_and_wake_if(server, &credits, 0);
3664                                 msleep(1000);
3665                         }
3666                 } while (rsize < rdata->bytes);
3667                 rdata->credits = credits;
3668
3669                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3670                 if (!rc) {
3671                         if (rdata->cfile->invalidHandle)
3672                                 rc = -EAGAIN;
3673                         else {
3674 #ifdef CONFIG_CIFS_SMB_DIRECT
3675                                 if (rdata->mr) {
3676                                         rdata->mr->need_invalidate = true;
3677                                         smbd_deregister_mr(rdata->mr);
3678                                         rdata->mr = NULL;
3679                                 }
3680 #endif
3681                                 rc = server->ops->async_readv(rdata);
3682                         }
3683                 }
3684
3685                 /* If the read was successfully sent, we are done */
3686                 if (!rc) {
3687                         /* Add to aio pending list */
3688                         list_add_tail(&rdata->list, rdata_list);
3689                         return 0;
3690                 }
3691
3692                 /* Roll back credits and retry if needed */
3693                 add_credits_and_wake_if(server, &rdata->credits, 0);
3694         } while (rc == -EAGAIN);
3695
3696 fail:
3697         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3698         return rc;
3699 }
3700
3701 static int
3702 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3703                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3704                      struct cifs_aio_ctx *ctx)
3705 {
3706         struct cifs_readdata *rdata;
3707         unsigned int npages, rsize;
3708         struct cifs_credits credits_on_stack;
3709         struct cifs_credits *credits = &credits_on_stack;
3710         size_t cur_len;
3711         int rc;
3712         pid_t pid;
3713         struct TCP_Server_Info *server;
3714         struct page **pagevec;
3715         size_t start;
3716         struct iov_iter direct_iov = ctx->iter;
3717
3718         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3719
3720         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3721                 pid = open_file->pid;
3722         else
3723                 pid = current->tgid;
3724
3725         if (ctx->direct_io)
3726                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3727
3728         do {
3729                 if (open_file->invalidHandle) {
3730                         rc = cifs_reopen_file(open_file, true);
3731                         if (rc == -EAGAIN)
3732                                 continue;
3733                         else if (rc)
3734                                 break;
3735                 }
3736
3737                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3738                                                    &rsize, credits);
3739                 if (rc)
3740                         break;
3741
3742                 cur_len = min_t(const size_t, len, rsize);
3743
3744                 if (ctx->direct_io) {
3745                         ssize_t result;
3746
3747                         result = iov_iter_get_pages_alloc(
3748                                         &direct_iov, &pagevec,
3749                                         cur_len, &start);
3750                         if (result < 0) {
3751                                 cifs_dbg(VFS,
3752                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3753                                          result, iov_iter_type(&direct_iov),
3754                                          direct_iov.iov_offset,
3755                                          direct_iov.count);
3756                                 dump_stack();
3757
3758                                 rc = result;
3759                                 add_credits_and_wake_if(server, credits, 0);
3760                                 break;
3761                         }
3762                         cur_len = (size_t)result;
3763                         iov_iter_advance(&direct_iov, cur_len);
3764
3765                         rdata = cifs_readdata_direct_alloc(
3766                                         pagevec, cifs_uncached_readv_complete);
3767                         if (!rdata) {
3768                                 add_credits_and_wake_if(server, credits, 0);
3769                                 rc = -ENOMEM;
3770                                 break;
3771                         }
3772
3773                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3774                         rdata->page_offset = start;
3775                         rdata->tailsz = npages > 1 ?
3776                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3777                                 cur_len;
3778
3779                 } else {
3780
3781                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3782                         /* allocate a readdata struct */
3783                         rdata = cifs_readdata_alloc(npages,
3784                                             cifs_uncached_readv_complete);
3785                         if (!rdata) {
3786                                 add_credits_and_wake_if(server, credits, 0);
3787                                 rc = -ENOMEM;
3788                                 break;
3789                         }
3790
3791                         rc = cifs_read_allocate_pages(rdata, npages);
3792                         if (rc) {
3793                                 kvfree(rdata->pages);
3794                                 kfree(rdata);
3795                                 add_credits_and_wake_if(server, credits, 0);
3796                                 break;
3797                         }
3798
3799                         rdata->tailsz = PAGE_SIZE;
3800                 }
3801
3802                 rdata->server = server;
3803                 rdata->cfile = cifsFileInfo_get(open_file);
3804                 rdata->nr_pages = npages;
3805                 rdata->offset = offset;
3806                 rdata->bytes = cur_len;
3807                 rdata->pid = pid;
3808                 rdata->pagesz = PAGE_SIZE;
3809                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3810                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3811                 rdata->credits = credits_on_stack;
3812                 rdata->ctx = ctx;
3813                 kref_get(&ctx->refcount);
3814
3815                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3816
3817                 if (!rc) {
3818                         if (rdata->cfile->invalidHandle)
3819                                 rc = -EAGAIN;
3820                         else
3821                                 rc = server->ops->async_readv(rdata);
3822                 }
3823
3824                 if (rc) {
3825                         add_credits_and_wake_if(server, &rdata->credits, 0);
3826                         kref_put(&rdata->refcount,
3827                                 cifs_uncached_readdata_release);
3828                         if (rc == -EAGAIN) {
3829                                 iov_iter_revert(&direct_iov, cur_len);
3830                                 continue;
3831                         }
3832                         break;
3833                 }
3834
3835                 list_add_tail(&rdata->list, rdata_list);
3836                 offset += cur_len;
3837                 len -= cur_len;
3838         } while (len > 0);
3839
3840         return rc;
3841 }
3842
3843 static void
3844 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3845 {
3846         struct cifs_readdata *rdata, *tmp;
3847         struct iov_iter *to = &ctx->iter;
3848         struct cifs_sb_info *cifs_sb;
3849         int rc;
3850
3851         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3852
3853         mutex_lock(&ctx->aio_mutex);
3854
3855         if (list_empty(&ctx->list)) {
3856                 mutex_unlock(&ctx->aio_mutex);
3857                 return;
3858         }
3859
3860         rc = ctx->rc;
3861         /* the loop below should proceed in the order of increasing offsets */
3862 again:
3863         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3864                 if (!rc) {
3865                         if (!try_wait_for_completion(&rdata->done)) {
3866                                 mutex_unlock(&ctx->aio_mutex);
3867                                 return;
3868                         }
3869
3870                         if (rdata->result == -EAGAIN) {
3871                                 /* resend call if it's a retryable error */
3872                                 struct list_head tmp_list;
3873                                 unsigned int got_bytes = rdata->got_bytes;
3874
3875                                 list_del_init(&rdata->list);
3876                                 INIT_LIST_HEAD(&tmp_list);
3877
3878                                 /*
3879                                  * Got a part of data and then reconnect has
3880                                  * happened -- fill the buffer and continue
3881                                  * reading.
3882                                  */
3883                                 if (got_bytes && got_bytes < rdata->bytes) {
3884                                         rc = 0;
3885                                         if (!ctx->direct_io)
3886                                                 rc = cifs_readdata_to_iov(rdata, to);
3887                                         if (rc) {
3888                                                 kref_put(&rdata->refcount,
3889                                                         cifs_uncached_readdata_release);
3890                                                 continue;
3891                                         }
3892                                 }
3893
3894                                 if (ctx->direct_io) {
3895                                         /*
3896                                          * Re-use rdata as this is a
3897                                          * direct I/O
3898                                          */
3899                                         rc = cifs_resend_rdata(
3900                                                 rdata,
3901                                                 &tmp_list, ctx);
3902                                 } else {
3903                                         rc = cifs_send_async_read(
3904                                                 rdata->offset + got_bytes,
3905                                                 rdata->bytes - got_bytes,
3906                                                 rdata->cfile, cifs_sb,
3907                                                 &tmp_list, ctx);
3908
3909                                         kref_put(&rdata->refcount,
3910                                                 cifs_uncached_readdata_release);
3911                                 }
3912
3913                                 list_splice(&tmp_list, &ctx->list);
3914
3915                                 goto again;
3916                         } else if (rdata->result)
3917                                 rc = rdata->result;
3918                         else if (!ctx->direct_io)
3919                                 rc = cifs_readdata_to_iov(rdata, to);
3920
3921                         /* if there was a short read -- discard anything left */
3922                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3923                                 rc = -ENODATA;
3924
3925                         ctx->total_len += rdata->got_bytes;
3926                 }
3927                 list_del_init(&rdata->list);
3928                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3929         }
3930
3931         if (!ctx->direct_io)
3932                 ctx->total_len = ctx->len - iov_iter_count(to);
3933
3934         /* mask nodata case */
3935         if (rc == -ENODATA)
3936                 rc = 0;
3937
3938         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3939
3940         mutex_unlock(&ctx->aio_mutex);
3941
3942         if (ctx->iocb && ctx->iocb->ki_complete)
3943                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3944         else
3945                 complete(&ctx->done);
3946 }
3947
3948 static ssize_t __cifs_readv(
3949         struct kiocb *iocb, struct iov_iter *to, bool direct)
3950 {
3951         size_t len;
3952         struct file *file = iocb->ki_filp;
3953         struct cifs_sb_info *cifs_sb;
3954         struct cifsFileInfo *cfile;
3955         struct cifs_tcon *tcon;
3956         ssize_t rc, total_read = 0;
3957         loff_t offset = iocb->ki_pos;
3958         struct cifs_aio_ctx *ctx;
3959
3960         /*
3961          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3962          * fall back to data copy read path
3963          * this could be improved by getting pages directly in ITER_KVEC
3964          */
3965         if (direct && iov_iter_is_kvec(to)) {
3966                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3967                 direct = false;
3968         }
3969
3970         len = iov_iter_count(to);
3971         if (!len)
3972                 return 0;
3973
3974         cifs_sb = CIFS_FILE_SB(file);
3975         cfile = file->private_data;
3976         tcon = tlink_tcon(cfile->tlink);
3977
3978         if (!tcon->ses->server->ops->async_readv)
3979                 return -ENOSYS;
3980
3981         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3982                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3983
3984         ctx = cifs_aio_ctx_alloc();
3985         if (!ctx)
3986                 return -ENOMEM;
3987
3988         ctx->cfile = cifsFileInfo_get(cfile);
3989
3990         if (!is_sync_kiocb(iocb))
3991                 ctx->iocb = iocb;
3992
3993         if (iter_is_iovec(to))
3994                 ctx->should_dirty = true;
3995
3996         if (direct) {
3997                 ctx->pos = offset;
3998                 ctx->direct_io = true;
3999                 ctx->iter = *to;
4000                 ctx->len = len;
4001         } else {
4002                 rc = setup_aio_ctx_iter(ctx, to, READ);
4003                 if (rc) {
4004                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4005                         return rc;
4006                 }
4007                 len = ctx->len;
4008         }
4009
4010         /* grab a lock here due to read response handlers can access ctx */
4011         mutex_lock(&ctx->aio_mutex);
4012
4013         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4014
4015         /* if at least one read request send succeeded, then reset rc */
4016         if (!list_empty(&ctx->list))
4017                 rc = 0;
4018
4019         mutex_unlock(&ctx->aio_mutex);
4020
4021         if (rc) {
4022                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4023                 return rc;
4024         }
4025
4026         if (!is_sync_kiocb(iocb)) {
4027                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4028                 return -EIOCBQUEUED;
4029         }
4030
4031         rc = wait_for_completion_killable(&ctx->done);
4032         if (rc) {
4033                 mutex_lock(&ctx->aio_mutex);
4034                 ctx->rc = rc = -EINTR;
4035                 total_read = ctx->total_len;
4036                 mutex_unlock(&ctx->aio_mutex);
4037         } else {
4038                 rc = ctx->rc;
4039                 total_read = ctx->total_len;
4040         }
4041
4042         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4043
4044         if (total_read) {
4045                 iocb->ki_pos += total_read;
4046                 return total_read;
4047         }
4048         return rc;
4049 }
4050
4051 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4052 {
4053         return __cifs_readv(iocb, to, true);
4054 }
4055
4056 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4057 {
4058         return __cifs_readv(iocb, to, false);
4059 }
4060
4061 ssize_t
4062 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4063 {
4064         struct inode *inode = file_inode(iocb->ki_filp);
4065         struct cifsInodeInfo *cinode = CIFS_I(inode);
4066         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4067         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4068                                                 iocb->ki_filp->private_data;
4069         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4070         int rc = -EACCES;
4071
4072         /*
4073          * In strict cache mode we need to read from the server all the time
4074          * if we don't have level II oplock because the server can delay mtime
4075          * change - so we can't make a decision about inode invalidating.
4076          * And we can also fail with pagereading if there are mandatory locks
4077          * on pages affected by this read but not on the region from pos to
4078          * pos+len-1.
4079          */
4080         if (!CIFS_CACHE_READ(cinode))
4081                 return cifs_user_readv(iocb, to);
4082
4083         if (cap_unix(tcon->ses) &&
4084             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4085             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4086                 return generic_file_read_iter(iocb, to);
4087
4088         /*
4089          * We need to hold the sem to be sure nobody modifies lock list
4090          * with a brlock that prevents reading.
4091          */
4092         down_read(&cinode->lock_sem);
4093         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4094                                      tcon->ses->server->vals->shared_lock_type,
4095                                      0, NULL, CIFS_READ_OP))
4096                 rc = generic_file_read_iter(iocb, to);
4097         up_read(&cinode->lock_sem);
4098         return rc;
4099 }
4100
4101 static ssize_t
4102 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4103 {
4104         int rc = -EACCES;
4105         unsigned int bytes_read = 0;
4106         unsigned int total_read;
4107         unsigned int current_read_size;
4108         unsigned int rsize;
4109         struct cifs_sb_info *cifs_sb;
4110         struct cifs_tcon *tcon;
4111         struct TCP_Server_Info *server;
4112         unsigned int xid;
4113         char *cur_offset;
4114         struct cifsFileInfo *open_file;
4115         struct cifs_io_parms io_parms = {0};
4116         int buf_type = CIFS_NO_BUFFER;
4117         __u32 pid;
4118
4119         xid = get_xid();
4120         cifs_sb = CIFS_FILE_SB(file);
4121
4122         /* FIXME: set up handlers for larger reads and/or convert to async */
4123         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4124
4125         if (file->private_data == NULL) {
4126                 rc = -EBADF;
4127                 free_xid(xid);
4128                 return rc;
4129         }
4130         open_file = file->private_data;
4131         tcon = tlink_tcon(open_file->tlink);
4132         server = cifs_pick_channel(tcon->ses);
4133
4134         if (!server->ops->sync_read) {
4135                 free_xid(xid);
4136                 return -ENOSYS;
4137         }
4138
4139         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4140                 pid = open_file->pid;
4141         else
4142                 pid = current->tgid;
4143
4144         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4145                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4146
4147         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4148              total_read += bytes_read, cur_offset += bytes_read) {
4149                 do {
4150                         current_read_size = min_t(uint, read_size - total_read,
4151                                                   rsize);
4152                         /*
4153                          * For windows me and 9x we do not want to request more
4154                          * than it negotiated since it will refuse the read
4155                          * then.
4156                          */
4157                         if (!(tcon->ses->capabilities &
4158                                 tcon->ses->server->vals->cap_large_files)) {
4159                                 current_read_size = min_t(uint,
4160                                         current_read_size, CIFSMaxBufSize);
4161                         }
4162                         if (open_file->invalidHandle) {
4163                                 rc = cifs_reopen_file(open_file, true);
4164                                 if (rc != 0)
4165                                         break;
4166                         }
4167                         io_parms.pid = pid;
4168                         io_parms.tcon = tcon;
4169                         io_parms.offset = *offset;
4170                         io_parms.length = current_read_size;
4171                         io_parms.server = server;
4172                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4173                                                     &bytes_read, &cur_offset,
4174                                                     &buf_type);
4175                 } while (rc == -EAGAIN);
4176
4177                 if (rc || (bytes_read == 0)) {
4178                         if (total_read) {
4179                                 break;
4180                         } else {
4181                                 free_xid(xid);
4182                                 return rc;
4183                         }
4184                 } else {
4185                         cifs_stats_bytes_read(tcon, total_read);
4186                         *offset += bytes_read;
4187                 }
4188         }
4189         free_xid(xid);
4190         return total_read;
4191 }
4192
4193 /*
4194  * If the page is mmap'ed into a process' page tables, then we need to make
4195  * sure that it doesn't change while being written back.
4196  */
4197 static vm_fault_t
4198 cifs_page_mkwrite(struct vm_fault *vmf)
4199 {
4200         struct page *page = vmf->page;
4201         struct file *file = vmf->vma->vm_file;
4202         struct inode *inode = file_inode(file);
4203
4204         cifs_fscache_wait_on_page_write(inode, page);
4205
4206         lock_page(page);
4207         return VM_FAULT_LOCKED;
4208 }
4209
4210 static const struct vm_operations_struct cifs_file_vm_ops = {
4211         .fault = filemap_fault,
4212         .map_pages = filemap_map_pages,
4213         .page_mkwrite = cifs_page_mkwrite,
4214 };
4215
4216 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4217 {
4218         int xid, rc = 0;
4219         struct inode *inode = file_inode(file);
4220
4221         xid = get_xid();
4222
4223         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4224                 rc = cifs_zap_mapping(inode);
4225         if (!rc)
4226                 rc = generic_file_mmap(file, vma);
4227         if (!rc)
4228                 vma->vm_ops = &cifs_file_vm_ops;
4229
4230         free_xid(xid);
4231         return rc;
4232 }
4233
4234 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4235 {
4236         int rc, xid;
4237
4238         xid = get_xid();
4239
4240         rc = cifs_revalidate_file(file);
4241         if (rc)
4242                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4243                          rc);
4244         if (!rc)
4245                 rc = generic_file_mmap(file, vma);
4246         if (!rc)
4247                 vma->vm_ops = &cifs_file_vm_ops;
4248
4249         free_xid(xid);
4250         return rc;
4251 }
4252
4253 static void
4254 cifs_readv_complete(struct work_struct *work)
4255 {
4256         unsigned int i, got_bytes;
4257         struct cifs_readdata *rdata = container_of(work,
4258                                                 struct cifs_readdata, work);
4259
4260         got_bytes = rdata->got_bytes;
4261         for (i = 0; i < rdata->nr_pages; i++) {
4262                 struct page *page = rdata->pages[i];
4263
4264                 lru_cache_add(page);
4265
4266                 if (rdata->result == 0 ||
4267                     (rdata->result == -EAGAIN && got_bytes)) {
4268                         flush_dcache_page(page);
4269                         SetPageUptodate(page);
4270                 } else
4271                         SetPageError(page);
4272
4273                 unlock_page(page);
4274
4275                 if (rdata->result == 0 ||
4276                     (rdata->result == -EAGAIN && got_bytes))
4277                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4278                 else
4279                         cifs_fscache_uncache_page(rdata->mapping->host, page);
4280
4281                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4282
4283                 put_page(page);
4284                 rdata->pages[i] = NULL;
4285         }
4286         kref_put(&rdata->refcount, cifs_readdata_release);
4287 }
4288
4289 static int
4290 readpages_fill_pages(struct TCP_Server_Info *server,
4291                      struct cifs_readdata *rdata, struct iov_iter *iter,
4292                      unsigned int len)
4293 {
4294         int result = 0;
4295         unsigned int i;
4296         u64 eof;
4297         pgoff_t eof_index;
4298         unsigned int nr_pages = rdata->nr_pages;
4299         unsigned int page_offset = rdata->page_offset;
4300
4301         /* determine the eof that the server (probably) has */
4302         eof = CIFS_I(rdata->mapping->host)->server_eof;
4303         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4304         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4305
4306         rdata->got_bytes = 0;
4307         rdata->tailsz = PAGE_SIZE;
4308         for (i = 0; i < nr_pages; i++) {
4309                 struct page *page = rdata->pages[i];
4310                 unsigned int to_read = rdata->pagesz;
4311                 size_t n;
4312
4313                 if (i == 0)
4314                         to_read -= page_offset;
4315                 else
4316                         page_offset = 0;
4317
4318                 n = to_read;
4319
4320                 if (len >= to_read) {
4321                         len -= to_read;
4322                 } else if (len > 0) {
4323                         /* enough for partial page, fill and zero the rest */
4324                         zero_user(page, len + page_offset, to_read - len);
4325                         n = rdata->tailsz = len;
4326                         len = 0;
4327                 } else if (page->index > eof_index) {
4328                         /*
4329                          * The VFS will not try to do readahead past the
4330                          * i_size, but it's possible that we have outstanding
4331                          * writes with gaps in the middle and the i_size hasn't
4332                          * caught up yet. Populate those with zeroed out pages
4333                          * to prevent the VFS from repeatedly attempting to
4334                          * fill them until the writes are flushed.
4335                          */
4336                         zero_user(page, 0, PAGE_SIZE);
4337                         lru_cache_add(page);
4338                         flush_dcache_page(page);
4339                         SetPageUptodate(page);
4340                         unlock_page(page);
4341                         put_page(page);
4342                         rdata->pages[i] = NULL;
4343                         rdata->nr_pages--;
4344                         continue;
4345                 } else {
4346                         /* no need to hold page hostage */
4347                         lru_cache_add(page);
4348                         unlock_page(page);
4349                         put_page(page);
4350                         rdata->pages[i] = NULL;
4351                         rdata->nr_pages--;
4352                         continue;
4353                 }
4354
4355                 if (iter)
4356                         result = copy_page_from_iter(
4357                                         page, page_offset, n, iter);
4358 #ifdef CONFIG_CIFS_SMB_DIRECT
4359                 else if (rdata->mr)
4360                         result = n;
4361 #endif
4362                 else
4363                         result = cifs_read_page_from_socket(
4364                                         server, page, page_offset, n);
4365                 if (result < 0)
4366                         break;
4367
4368                 rdata->got_bytes += result;
4369         }
4370
4371         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4372                                                 rdata->got_bytes : result;
4373 }
4374
4375 static int
4376 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4377                                struct cifs_readdata *rdata, unsigned int len)
4378 {
4379         return readpages_fill_pages(server, rdata, NULL, len);
4380 }
4381
4382 static int
4383 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4384                                struct cifs_readdata *rdata,
4385                                struct iov_iter *iter)
4386 {
4387         return readpages_fill_pages(server, rdata, iter, iter->count);
4388 }
4389
4390 static int
4391 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4392                     unsigned int rsize, struct list_head *tmplist,
4393                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4394 {
4395         struct page *page, *tpage;
4396         unsigned int expected_index;
4397         int rc;
4398         gfp_t gfp = readahead_gfp_mask(mapping);
4399
4400         INIT_LIST_HEAD(tmplist);
4401
4402         page = lru_to_page(page_list);
4403
4404         /*
4405          * Lock the page and put it in the cache. Since no one else
4406          * should have access to this page, we're safe to simply set
4407          * PG_locked without checking it first.
4408          */
4409         __SetPageLocked(page);
4410         rc = add_to_page_cache_locked(page, mapping,
4411                                       page->index, gfp);
4412
4413         /* give up if we can't stick it in the cache */
4414         if (rc) {
4415                 __ClearPageLocked(page);
4416                 return rc;
4417         }
4418
4419         /* move first page to the tmplist */
4420         *offset = (loff_t)page->index << PAGE_SHIFT;
4421         *bytes = PAGE_SIZE;
4422         *nr_pages = 1;
4423         list_move_tail(&page->lru, tmplist);
4424
4425         /* now try and add more pages onto the request */
4426         expected_index = page->index + 1;
4427         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4428                 /* discontinuity ? */
4429                 if (page->index != expected_index)
4430                         break;
4431
4432                 /* would this page push the read over the rsize? */
4433                 if (*bytes + PAGE_SIZE > rsize)
4434                         break;
4435
4436                 __SetPageLocked(page);
4437                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4438                 if (rc) {
4439                         __ClearPageLocked(page);
4440                         break;
4441                 }
4442                 list_move_tail(&page->lru, tmplist);
4443                 (*bytes) += PAGE_SIZE;
4444                 expected_index++;
4445                 (*nr_pages)++;
4446         }
4447         return rc;
4448 }
4449
4450 static int cifs_readpages(struct file *file, struct address_space *mapping,
4451         struct list_head *page_list, unsigned num_pages)
4452 {
4453         int rc;
4454         int err = 0;
4455         struct list_head tmplist;
4456         struct cifsFileInfo *open_file = file->private_data;
4457         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4458         struct TCP_Server_Info *server;
4459         pid_t pid;
4460         unsigned int xid;
4461
4462         xid = get_xid();
4463         /*
4464          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4465          * immediately if the cookie is negative
4466          *
4467          * After this point, every page in the list might have PG_fscache set,
4468          * so we will need to clean that up off of every page we don't use.
4469          */
4470         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4471                                          &num_pages);
4472         if (rc == 0) {
4473                 free_xid(xid);
4474                 return rc;
4475         }
4476
4477         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4478                 pid = open_file->pid;
4479         else
4480                 pid = current->tgid;
4481
4482         rc = 0;
4483         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4484
4485         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4486                  __func__, file, mapping, num_pages);
4487
4488         /*
4489          * Start with the page at end of list and move it to private
4490          * list. Do the same with any following pages until we hit
4491          * the rsize limit, hit an index discontinuity, or run out of
4492          * pages. Issue the async read and then start the loop again
4493          * until the list is empty.
4494          *
4495          * Note that list order is important. The page_list is in
4496          * the order of declining indexes. When we put the pages in
4497          * the rdata->pages, then we want them in increasing order.
4498          */
4499         while (!list_empty(page_list) && !err) {
4500                 unsigned int i, nr_pages, bytes, rsize;
4501                 loff_t offset;
4502                 struct page *page, *tpage;
4503                 struct cifs_readdata *rdata;
4504                 struct cifs_credits credits_on_stack;
4505                 struct cifs_credits *credits = &credits_on_stack;
4506
4507                 if (open_file->invalidHandle) {
4508                         rc = cifs_reopen_file(open_file, true);
4509                         if (rc == -EAGAIN)
4510                                 continue;
4511                         else if (rc)
4512                                 break;
4513                 }
4514
4515                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4516                                                    &rsize, credits);
4517                 if (rc)
4518                         break;
4519
4520                 /*
4521                  * Give up immediately if rsize is too small to read an entire
4522                  * page. The VFS will fall back to readpage. We should never
4523                  * reach this point however since we set ra_pages to 0 when the
4524                  * rsize is smaller than a cache page.
4525                  */
4526                 if (unlikely(rsize < PAGE_SIZE)) {
4527                         add_credits_and_wake_if(server, credits, 0);
4528                         free_xid(xid);
4529                         return 0;
4530                 }
4531
4532                 nr_pages = 0;
4533                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4534                                          &nr_pages, &offset, &bytes);
4535                 if (!nr_pages) {
4536                         add_credits_and_wake_if(server, credits, 0);
4537                         break;
4538                 }
4539
4540                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4541                 if (!rdata) {
4542                         /* best to give up if we're out of mem */
4543                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4544                                 list_del(&page->lru);
4545                                 lru_cache_add(page);
4546                                 unlock_page(page);
4547                                 put_page(page);
4548                         }
4549                         rc = -ENOMEM;
4550                         add_credits_and_wake_if(server, credits, 0);
4551                         break;
4552                 }
4553
4554                 rdata->cfile = cifsFileInfo_get(open_file);
4555                 rdata->server = server;
4556                 rdata->mapping = mapping;
4557                 rdata->offset = offset;
4558                 rdata->bytes = bytes;
4559                 rdata->pid = pid;
4560                 rdata->pagesz = PAGE_SIZE;
4561                 rdata->tailsz = PAGE_SIZE;
4562                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4563                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4564                 rdata->credits = credits_on_stack;
4565
4566                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4567                         list_del(&page->lru);
4568                         rdata->pages[rdata->nr_pages++] = page;
4569                 }
4570
4571                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4572
4573                 if (!rc) {
4574                         if (rdata->cfile->invalidHandle)
4575                                 rc = -EAGAIN;
4576                         else
4577                                 rc = server->ops->async_readv(rdata);
4578                 }
4579
4580                 if (rc) {
4581                         add_credits_and_wake_if(server, &rdata->credits, 0);
4582                         for (i = 0; i < rdata->nr_pages; i++) {
4583                                 page = rdata->pages[i];
4584                                 lru_cache_add(page);
4585                                 unlock_page(page);
4586                                 put_page(page);
4587                         }
4588                         /* Fallback to the readpage in error/reconnect cases */
4589                         kref_put(&rdata->refcount, cifs_readdata_release);
4590                         break;
4591                 }
4592
4593                 kref_put(&rdata->refcount, cifs_readdata_release);
4594         }
4595
4596         /* Any pages that have been shown to fscache but didn't get added to
4597          * the pagecache must be uncached before they get returned to the
4598          * allocator.
4599          */
4600         cifs_fscache_readpages_cancel(mapping->host, page_list);
4601         free_xid(xid);
4602         return rc;
4603 }
4604
4605 /*
4606  * cifs_readpage_worker must be called with the page pinned
4607  */
4608 static int cifs_readpage_worker(struct file *file, struct page *page,
4609         loff_t *poffset)
4610 {
4611         char *read_data;
4612         int rc;
4613
4614         /* Is the page cached? */
4615         rc = cifs_readpage_from_fscache(file_inode(file), page);
4616         if (rc == 0)
4617                 goto read_complete;
4618
4619         read_data = kmap(page);
4620         /* for reads over a certain size could initiate async read ahead */
4621
4622         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4623
4624         if (rc < 0)
4625                 goto io_error;
4626         else
4627                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4628
4629         /* we do not want atime to be less than mtime, it broke some apps */
4630         file_inode(file)->i_atime = current_time(file_inode(file));
4631         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4632                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4633         else
4634                 file_inode(file)->i_atime = current_time(file_inode(file));
4635
4636         if (PAGE_SIZE > rc)
4637                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4638
4639         flush_dcache_page(page);
4640         SetPageUptodate(page);
4641
4642         /* send this page to the cache */
4643         cifs_readpage_to_fscache(file_inode(file), page);
4644
4645         rc = 0;
4646
4647 io_error:
4648         kunmap(page);
4649         unlock_page(page);
4650
4651 read_complete:
4652         return rc;
4653 }
4654
4655 static int cifs_readpage(struct file *file, struct page *page)
4656 {
4657         loff_t offset = page_file_offset(page);
4658         int rc = -EACCES;
4659         unsigned int xid;
4660
4661         xid = get_xid();
4662
4663         if (file->private_data == NULL) {
4664                 rc = -EBADF;
4665                 free_xid(xid);
4666                 return rc;
4667         }
4668
4669         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4670                  page, (int)offset, (int)offset);
4671
4672         rc = cifs_readpage_worker(file, page, &offset);
4673
4674         free_xid(xid);
4675         return rc;
4676 }
4677
4678 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4679 {
4680         struct cifsFileInfo *open_file;
4681
4682         spin_lock(&cifs_inode->open_file_lock);
4683         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4684                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4685                         spin_unlock(&cifs_inode->open_file_lock);
4686                         return 1;
4687                 }
4688         }
4689         spin_unlock(&cifs_inode->open_file_lock);
4690         return 0;
4691 }
4692
4693 /* We do not want to update the file size from server for inodes
4694    open for write - to avoid races with writepage extending
4695    the file - in the future we could consider allowing
4696    refreshing the inode only on increases in the file size
4697    but this is tricky to do without racing with writebehind
4698    page caching in the current Linux kernel design */
4699 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4700 {
4701         if (!cifsInode)
4702                 return true;
4703
4704         if (is_inode_writable(cifsInode)) {
4705                 /* This inode is open for write at least once */
4706                 struct cifs_sb_info *cifs_sb;
4707
4708                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4709                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4710                         /* since no page cache to corrupt on directio
4711                         we can change size safely */
4712                         return true;
4713                 }
4714
4715                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4716                         return true;
4717
4718                 return false;
4719         } else
4720                 return true;
4721 }
4722
4723 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4724                         loff_t pos, unsigned len, unsigned flags,
4725                         struct page **pagep, void **fsdata)
4726 {
4727         int oncethru = 0;
4728         pgoff_t index = pos >> PAGE_SHIFT;
4729         loff_t offset = pos & (PAGE_SIZE - 1);
4730         loff_t page_start = pos & PAGE_MASK;
4731         loff_t i_size;
4732         struct page *page;
4733         int rc = 0;
4734
4735         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4736
4737 start:
4738         page = grab_cache_page_write_begin(mapping, index, flags);
4739         if (!page) {
4740                 rc = -ENOMEM;
4741                 goto out;
4742         }
4743
4744         if (PageUptodate(page))
4745                 goto out;
4746
4747         /*
4748          * If we write a full page it will be up to date, no need to read from
4749          * the server. If the write is short, we'll end up doing a sync write
4750          * instead.
4751          */
4752         if (len == PAGE_SIZE)
4753                 goto out;
4754
4755         /*
4756          * optimize away the read when we have an oplock, and we're not
4757          * expecting to use any of the data we'd be reading in. That
4758          * is, when the page lies beyond the EOF, or straddles the EOF
4759          * and the write will cover all of the existing data.
4760          */
4761         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4762                 i_size = i_size_read(mapping->host);
4763                 if (page_start >= i_size ||
4764                     (offset == 0 && (pos + len) >= i_size)) {
4765                         zero_user_segments(page, 0, offset,
4766                                            offset + len,
4767                                            PAGE_SIZE);
4768                         /*
4769                          * PageChecked means that the parts of the page
4770                          * to which we're not writing are considered up
4771                          * to date. Once the data is copied to the
4772                          * page, it can be set uptodate.
4773                          */
4774                         SetPageChecked(page);
4775                         goto out;
4776                 }
4777         }
4778
4779         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4780                 /*
4781                  * might as well read a page, it is fast enough. If we get
4782                  * an error, we don't need to return it. cifs_write_end will
4783                  * do a sync write instead since PG_uptodate isn't set.
4784                  */
4785                 cifs_readpage_worker(file, page, &page_start);
4786                 put_page(page);
4787                 oncethru = 1;
4788                 goto start;
4789         } else {
4790                 /* we could try using another file handle if there is one -
4791                    but how would we lock it to prevent close of that handle
4792                    racing with this read? In any case
4793                    this will be written out by write_end so is fine */
4794         }
4795 out:
4796         *pagep = page;
4797         return rc;
4798 }
4799
4800 static int cifs_release_page(struct page *page, gfp_t gfp)
4801 {
4802         if (PagePrivate(page))
4803                 return 0;
4804
4805         return cifs_fscache_release_page(page, gfp);
4806 }
4807
4808 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4809                                  unsigned int length)
4810 {
4811         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4812
4813         if (offset == 0 && length == PAGE_SIZE)
4814                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4815 }
4816
4817 static int cifs_launder_page(struct page *page)
4818 {
4819         int rc = 0;
4820         loff_t range_start = page_offset(page);
4821         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4822         struct writeback_control wbc = {
4823                 .sync_mode = WB_SYNC_ALL,
4824                 .nr_to_write = 0,
4825                 .range_start = range_start,
4826                 .range_end = range_end,
4827         };
4828
4829         cifs_dbg(FYI, "Launder page: %p\n", page);
4830
4831         if (clear_page_dirty_for_io(page))
4832                 rc = cifs_writepage_locked(page, &wbc);
4833
4834         cifs_fscache_invalidate_page(page, page->mapping->host);
4835         return rc;
4836 }
4837
4838 void cifs_oplock_break(struct work_struct *work)
4839 {
4840         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4841                                                   oplock_break);
4842         struct inode *inode = d_inode(cfile->dentry);
4843         struct cifsInodeInfo *cinode = CIFS_I(inode);
4844         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4845         struct TCP_Server_Info *server = tcon->ses->server;
4846         int rc = 0;
4847         bool purge_cache = false;
4848         bool is_deferred = false;
4849         struct cifs_deferred_close *dclose;
4850
4851         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4852                         TASK_UNINTERRUPTIBLE);
4853
4854         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4855                                       cfile->oplock_epoch, &purge_cache);
4856
4857         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4858                                                 cifs_has_mand_locks(cinode)) {
4859                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4860                          inode);
4861                 cinode->oplock = 0;
4862         }
4863
4864         if (inode && S_ISREG(inode->i_mode)) {
4865                 if (CIFS_CACHE_READ(cinode))
4866                         break_lease(inode, O_RDONLY);
4867                 else
4868                         break_lease(inode, O_WRONLY);
4869                 rc = filemap_fdatawrite(inode->i_mapping);
4870                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4871                         rc = filemap_fdatawait(inode->i_mapping);
4872                         mapping_set_error(inode->i_mapping, rc);
4873                         cifs_zap_mapping(inode);
4874                 }
4875                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4876                 if (CIFS_CACHE_WRITE(cinode))
4877                         goto oplock_break_ack;
4878         }
4879
4880         rc = cifs_push_locks(cfile);
4881         if (rc)
4882                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4883
4884 oplock_break_ack:
4885         /*
4886          * When oplock break is received and there are no active
4887          * file handles but cached, then schedule deferred close immediately.
4888          * So, new open will not use cached handle.
4889          */
4890         spin_lock(&CIFS_I(inode)->deferred_lock);
4891         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4892         spin_unlock(&CIFS_I(inode)->deferred_lock);
4893         if (is_deferred &&
4894             cfile->deferred_close_scheduled &&
4895             delayed_work_pending(&cfile->deferred)) {
4896                 if (cancel_delayed_work(&cfile->deferred)) {
4897                         _cifsFileInfo_put(cfile, false, false);
4898                         goto oplock_break_done;
4899                 }
4900         }
4901         /*
4902          * releasing stale oplock after recent reconnect of smb session using
4903          * a now incorrect file handle is not a data integrity issue but do
4904          * not bother sending an oplock release if session to server still is
4905          * disconnected since oplock already released by the server
4906          */
4907         if (!cfile->oplock_break_cancelled) {
4908                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4909                                                              cinode);
4910                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4911         }
4912 oplock_break_done:
4913         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4914         cifs_done_oplock_break(cinode);
4915 }
4916
4917 /*
4918  * The presence of cifs_direct_io() in the address space ops vector
4919  * allowes open() O_DIRECT flags which would have failed otherwise.
4920  *
4921  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4922  * so this method should never be called.
4923  *
4924  * Direct IO is not yet supported in the cached mode. 
4925  */
4926 static ssize_t
4927 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4928 {
4929         /*
4930          * FIXME
4931          * Eventually need to support direct IO for non forcedirectio mounts
4932          */
4933         return -EINVAL;
4934 }
4935
4936 static int cifs_swap_activate(struct swap_info_struct *sis,
4937                               struct file *swap_file, sector_t *span)
4938 {
4939         struct cifsFileInfo *cfile = swap_file->private_data;
4940         struct inode *inode = swap_file->f_mapping->host;
4941         unsigned long blocks;
4942         long long isize;
4943
4944         cifs_dbg(FYI, "swap activate\n");
4945
4946         spin_lock(&inode->i_lock);
4947         blocks = inode->i_blocks;
4948         isize = inode->i_size;
4949         spin_unlock(&inode->i_lock);
4950         if (blocks*512 < isize) {
4951                 pr_warn("swap activate: swapfile has holes\n");
4952                 return -EINVAL;
4953         }
4954         *span = sis->pages;
4955
4956         pr_warn_once("Swap support over SMB3 is experimental\n");
4957
4958         /*
4959          * TODO: consider adding ACL (or documenting how) to prevent other
4960          * users (on this or other systems) from reading it
4961          */
4962
4963
4964         /* TODO: add sk_set_memalloc(inet) or similar */
4965
4966         if (cfile)
4967                 cfile->swapfile = true;
4968         /*
4969          * TODO: Since file already open, we can't open with DENY_ALL here
4970          * but we could add call to grab a byte range lock to prevent others
4971          * from reading or writing the file
4972          */
4973
4974         return 0;
4975 }
4976
4977 static void cifs_swap_deactivate(struct file *file)
4978 {
4979         struct cifsFileInfo *cfile = file->private_data;
4980
4981         cifs_dbg(FYI, "swap deactivate\n");
4982
4983         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4984
4985         if (cfile)
4986                 cfile->swapfile = false;
4987
4988         /* do we need to unpin (or unlock) the file */
4989 }
4990
4991 const struct address_space_operations cifs_addr_ops = {
4992         .readpage = cifs_readpage,
4993         .readpages = cifs_readpages,
4994         .writepage = cifs_writepage,
4995         .writepages = cifs_writepages,
4996         .write_begin = cifs_write_begin,
4997         .write_end = cifs_write_end,
4998         .set_page_dirty = __set_page_dirty_nobuffers,
4999         .releasepage = cifs_release_page,
5000         .direct_IO = cifs_direct_io,
5001         .invalidatepage = cifs_invalidate_page,
5002         .launder_page = cifs_launder_page,
5003         /*
5004          * TODO: investigate and if useful we could add an cifs_migratePage
5005          * helper (under an CONFIG_MIGRATION) in the future, and also
5006          * investigate and add an is_dirty_writeback helper if needed
5007          */
5008         .swap_activate = cifs_swap_activate,
5009         .swap_deactivate = cifs_swap_deactivate,
5010 };
5011
5012 /*
5013  * cifs_readpages requires the server to support a buffer large enough to
5014  * contain the header plus one complete page of data.  Otherwise, we need
5015  * to leave cifs_readpages out of the address space operations.
5016  */
5017 const struct address_space_operations cifs_addr_ops_smallbuf = {
5018         .readpage = cifs_readpage,
5019         .writepage = cifs_writepage,
5020         .writepages = cifs_writepages,
5021         .write_begin = cifs_write_begin,
5022         .write_end = cifs_write_end,
5023         .set_page_dirty = __set_page_dirty_nobuffers,
5024         .releasepage = cifs_release_page,
5025         .invalidatepage = cifs_invalidate_page,
5026         .launder_page = cifs_launder_page,
5027 };
This page took 0.310626 seconds and 4 git commands to generate.