]> Git Repo - linux.git/blob - fs/cifs/file.c
PCI: hv: Avoid the retarget interrupt hypercall in irq_unmask() on ARM64
[linux.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French ([email protected])
8  *              Jeremy Allison ([email protected])
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "cifs_unicode.h"
30 #include "cifs_debug.h"
31 #include "cifs_fs_sb.h"
32 #include "fscache.h"
33 #include "smbdirect.h"
34 #include "fs_context.h"
35 #include "cifs_ioctl.h"
36
37 static inline int cifs_convert_flags(unsigned int flags)
38 {
39         if ((flags & O_ACCMODE) == O_RDONLY)
40                 return GENERIC_READ;
41         else if ((flags & O_ACCMODE) == O_WRONLY)
42                 return GENERIC_WRITE;
43         else if ((flags & O_ACCMODE) == O_RDWR) {
44                 /* GENERIC_ALL is too much permission to request
45                    can cause unnecessary access denied on create */
46                 /* return GENERIC_ALL; */
47                 return (GENERIC_READ | GENERIC_WRITE);
48         }
49
50         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
51                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
52                 FILE_READ_DATA);
53 }
54
55 static u32 cifs_posix_convert_flags(unsigned int flags)
56 {
57         u32 posix_flags = 0;
58
59         if ((flags & O_ACCMODE) == O_RDONLY)
60                 posix_flags = SMB_O_RDONLY;
61         else if ((flags & O_ACCMODE) == O_WRONLY)
62                 posix_flags = SMB_O_WRONLY;
63         else if ((flags & O_ACCMODE) == O_RDWR)
64                 posix_flags = SMB_O_RDWR;
65
66         if (flags & O_CREAT) {
67                 posix_flags |= SMB_O_CREAT;
68                 if (flags & O_EXCL)
69                         posix_flags |= SMB_O_EXCL;
70         } else if (flags & O_EXCL)
71                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
72                          current->comm, current->tgid);
73
74         if (flags & O_TRUNC)
75                 posix_flags |= SMB_O_TRUNC;
76         /* be safe and imply O_SYNC for O_DSYNC */
77         if (flags & O_DSYNC)
78                 posix_flags |= SMB_O_SYNC;
79         if (flags & O_DIRECTORY)
80                 posix_flags |= SMB_O_DIRECTORY;
81         if (flags & O_NOFOLLOW)
82                 posix_flags |= SMB_O_NOFOLLOW;
83         if (flags & O_DIRECT)
84                 posix_flags |= SMB_O_DIRECT;
85
86         return posix_flags;
87 }
88
89 static inline int cifs_get_disposition(unsigned int flags)
90 {
91         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
92                 return FILE_CREATE;
93         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
94                 return FILE_OVERWRITE_IF;
95         else if ((flags & O_CREAT) == O_CREAT)
96                 return FILE_OPEN_IF;
97         else if ((flags & O_TRUNC) == O_TRUNC)
98                 return FILE_OVERWRITE;
99         else
100                 return FILE_OPEN;
101 }
102
103 int cifs_posix_open(const char *full_path, struct inode **pinode,
104                         struct super_block *sb, int mode, unsigned int f_flags,
105                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
106 {
107         int rc;
108         FILE_UNIX_BASIC_INFO *presp_data;
109         __u32 posix_flags = 0;
110         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
111         struct cifs_fattr fattr;
112         struct tcon_link *tlink;
113         struct cifs_tcon *tcon;
114
115         cifs_dbg(FYI, "posix open %s\n", full_path);
116
117         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
118         if (presp_data == NULL)
119                 return -ENOMEM;
120
121         tlink = cifs_sb_tlink(cifs_sb);
122         if (IS_ERR(tlink)) {
123                 rc = PTR_ERR(tlink);
124                 goto posix_open_ret;
125         }
126
127         tcon = tlink_tcon(tlink);
128         mode &= ~current_umask();
129
130         posix_flags = cifs_posix_convert_flags(f_flags);
131         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
132                              poplock, full_path, cifs_sb->local_nls,
133                              cifs_remap(cifs_sb));
134         cifs_put_tlink(tlink);
135
136         if (rc)
137                 goto posix_open_ret;
138
139         if (presp_data->Type == cpu_to_le32(-1))
140                 goto posix_open_ret; /* open ok, caller does qpathinfo */
141
142         if (!pinode)
143                 goto posix_open_ret; /* caller does not need info */
144
145         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
146
147         /* get new inode and set it up */
148         if (*pinode == NULL) {
149                 cifs_fill_uniqueid(sb, &fattr);
150                 *pinode = cifs_iget(sb, &fattr);
151                 if (!*pinode) {
152                         rc = -ENOMEM;
153                         goto posix_open_ret;
154                 }
155         } else {
156                 cifs_revalidate_mapping(*pinode);
157                 rc = cifs_fattr_to_inode(*pinode, &fattr);
158         }
159
160 posix_open_ret:
161         kfree(presp_data);
162         return rc;
163 }
164
165 static int
166 cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
167              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
168              struct cifs_fid *fid, unsigned int xid)
169 {
170         int rc;
171         int desired_access;
172         int disposition;
173         int create_options = CREATE_NOT_DIR;
174         FILE_ALL_INFO *buf;
175         struct TCP_Server_Info *server = tcon->ses->server;
176         struct cifs_open_parms oparms;
177
178         if (!server->ops->open)
179                 return -ENOSYS;
180
181         desired_access = cifs_convert_flags(f_flags);
182
183 /*********************************************************************
184  *  open flag mapping table:
185  *
186  *      POSIX Flag            CIFS Disposition
187  *      ----------            ----------------
188  *      O_CREAT               FILE_OPEN_IF
189  *      O_CREAT | O_EXCL      FILE_CREATE
190  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
191  *      O_TRUNC               FILE_OVERWRITE
192  *      none of the above     FILE_OPEN
193  *
194  *      Note that there is not a direct match between disposition
195  *      FILE_SUPERSEDE (ie create whether or not file exists although
196  *      O_CREAT | O_TRUNC is similar but truncates the existing
197  *      file rather than creating a new file as FILE_SUPERSEDE does
198  *      (which uses the attributes / metadata passed in on open call)
199  *?
200  *?  O_SYNC is a reasonable match to CIFS writethrough flag
201  *?  and the read write flags match reasonably.  O_LARGEFILE
202  *?  is irrelevant because largefile support is always used
203  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205  *********************************************************************/
206
207         disposition = cifs_get_disposition(f_flags);
208
209         /* BB pass O_SYNC flag through on file attributes .. BB */
210
211         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
212         if (!buf)
213                 return -ENOMEM;
214
215         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
216         if (f_flags & O_SYNC)
217                 create_options |= CREATE_WRITE_THROUGH;
218
219         if (f_flags & O_DIRECT)
220                 create_options |= CREATE_NO_BUFFER;
221
222         oparms.tcon = tcon;
223         oparms.cifs_sb = cifs_sb;
224         oparms.desired_access = desired_access;
225         oparms.create_options = cifs_create_options(cifs_sb, create_options);
226         oparms.disposition = disposition;
227         oparms.path = full_path;
228         oparms.fid = fid;
229         oparms.reconnect = false;
230
231         rc = server->ops->open(xid, &oparms, oplock, buf);
232
233         if (rc)
234                 goto out;
235
236         /* TODO: Add support for calling posix query info but with passing in fid */
237         if (tcon->unix_ext)
238                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
239                                               xid);
240         else
241                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
242                                          xid, fid);
243
244         if (rc) {
245                 server->ops->close(xid, tcon, fid);
246                 if (rc == -ESTALE)
247                         rc = -EOPENSTALE;
248         }
249
250 out:
251         kfree(buf);
252         return rc;
253 }
254
255 static bool
256 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
257 {
258         struct cifs_fid_locks *cur;
259         bool has_locks = false;
260
261         down_read(&cinode->lock_sem);
262         list_for_each_entry(cur, &cinode->llist, llist) {
263                 if (!list_empty(&cur->locks)) {
264                         has_locks = true;
265                         break;
266                 }
267         }
268         up_read(&cinode->lock_sem);
269         return has_locks;
270 }
271
272 void
273 cifs_down_write(struct rw_semaphore *sem)
274 {
275         while (!down_write_trylock(sem))
276                 msleep(10);
277 }
278
279 static void cifsFileInfo_put_work(struct work_struct *work);
280
281 struct cifsFileInfo *
282 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
283                   struct tcon_link *tlink, __u32 oplock)
284 {
285         struct dentry *dentry = file_dentry(file);
286         struct inode *inode = d_inode(dentry);
287         struct cifsInodeInfo *cinode = CIFS_I(inode);
288         struct cifsFileInfo *cfile;
289         struct cifs_fid_locks *fdlocks;
290         struct cifs_tcon *tcon = tlink_tcon(tlink);
291         struct TCP_Server_Info *server = tcon->ses->server;
292
293         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
294         if (cfile == NULL)
295                 return cfile;
296
297         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
298         if (!fdlocks) {
299                 kfree(cfile);
300                 return NULL;
301         }
302
303         INIT_LIST_HEAD(&fdlocks->locks);
304         fdlocks->cfile = cfile;
305         cfile->llist = fdlocks;
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->deferred_close_scheduled = false;
314         cfile->tlink = cifs_get_tlink(tlink);
315         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
316         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
317         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
318         mutex_init(&cfile->fh_mutex);
319         spin_lock_init(&cfile->file_info_lock);
320
321         cifs_sb_active(inode->i_sb);
322
323         /*
324          * If the server returned a read oplock and we have mandatory brlocks,
325          * set oplock level to None.
326          */
327         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
328                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
329                 oplock = 0;
330         }
331
332         cifs_down_write(&cinode->lock_sem);
333         list_add(&fdlocks->llist, &cinode->llist);
334         up_write(&cinode->lock_sem);
335
336         spin_lock(&tcon->open_file_lock);
337         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
338                 oplock = fid->pending_open->oplock;
339         list_del(&fid->pending_open->olist);
340
341         fid->purge_cache = false;
342         server->ops->set_fid(cfile, fid, oplock);
343
344         list_add(&cfile->tlist, &tcon->openFileList);
345         atomic_inc(&tcon->num_local_opens);
346
347         /* if readable file instance put first in list*/
348         spin_lock(&cinode->open_file_lock);
349         if (file->f_mode & FMODE_READ)
350                 list_add(&cfile->flist, &cinode->openFileList);
351         else
352                 list_add_tail(&cfile->flist, &cinode->openFileList);
353         spin_unlock(&cinode->open_file_lock);
354         spin_unlock(&tcon->open_file_lock);
355
356         if (fid->purge_cache)
357                 cifs_zap_mapping(inode);
358
359         file->private_data = cfile;
360         return cfile;
361 }
362
363 struct cifsFileInfo *
364 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
365 {
366         spin_lock(&cifs_file->file_info_lock);
367         cifsFileInfo_get_locked(cifs_file);
368         spin_unlock(&cifs_file->file_info_lock);
369         return cifs_file;
370 }
371
372 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
373 {
374         struct inode *inode = d_inode(cifs_file->dentry);
375         struct cifsInodeInfo *cifsi = CIFS_I(inode);
376         struct cifsLockInfo *li, *tmp;
377         struct super_block *sb = inode->i_sb;
378
379         /*
380          * Delete any outstanding lock records. We'll lose them when the file
381          * is closed anyway.
382          */
383         cifs_down_write(&cifsi->lock_sem);
384         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
385                 list_del(&li->llist);
386                 cifs_del_lock_waiters(li);
387                 kfree(li);
388         }
389         list_del(&cifs_file->llist->llist);
390         kfree(cifs_file->llist);
391         up_write(&cifsi->lock_sem);
392
393         cifs_put_tlink(cifs_file->tlink);
394         dput(cifs_file->dentry);
395         cifs_sb_deactive(sb);
396         kfree(cifs_file);
397 }
398
399 static void cifsFileInfo_put_work(struct work_struct *work)
400 {
401         struct cifsFileInfo *cifs_file = container_of(work,
402                         struct cifsFileInfo, put);
403
404         cifsFileInfo_put_final(cifs_file);
405 }
406
407 /**
408  * cifsFileInfo_put - release a reference of file priv data
409  *
410  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
411  *
412  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
413  */
414 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
415 {
416         _cifsFileInfo_put(cifs_file, true, true);
417 }
418
419 /**
420  * _cifsFileInfo_put - release a reference of file priv data
421  *
422  * This may involve closing the filehandle @cifs_file out on the
423  * server. Must be called without holding tcon->open_file_lock,
424  * cinode->open_file_lock and cifs_file->file_info_lock.
425  *
426  * If @wait_for_oplock_handler is true and we are releasing the last
427  * reference, wait for any running oplock break handler of the file
428  * and cancel any pending one.
429  *
430  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
431  * @wait_oplock_handler: must be false if called from oplock_break_handler
432  * @offload:    not offloaded on close and oplock breaks
433  *
434  */
435 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
436                        bool wait_oplock_handler, bool offload)
437 {
438         struct inode *inode = d_inode(cifs_file->dentry);
439         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
440         struct TCP_Server_Info *server = tcon->ses->server;
441         struct cifsInodeInfo *cifsi = CIFS_I(inode);
442         struct super_block *sb = inode->i_sb;
443         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
444         struct cifs_fid fid;
445         struct cifs_pending_open open;
446         bool oplock_break_cancelled;
447
448         spin_lock(&tcon->open_file_lock);
449         spin_lock(&cifsi->open_file_lock);
450         spin_lock(&cifs_file->file_info_lock);
451         if (--cifs_file->count > 0) {
452                 spin_unlock(&cifs_file->file_info_lock);
453                 spin_unlock(&cifsi->open_file_lock);
454                 spin_unlock(&tcon->open_file_lock);
455                 return;
456         }
457         spin_unlock(&cifs_file->file_info_lock);
458
459         if (server->ops->get_lease_key)
460                 server->ops->get_lease_key(inode, &fid);
461
462         /* store open in pending opens to make sure we don't miss lease break */
463         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
464
465         /* remove it from the lists */
466         list_del(&cifs_file->flist);
467         list_del(&cifs_file->tlist);
468         atomic_dec(&tcon->num_local_opens);
469
470         if (list_empty(&cifsi->openFileList)) {
471                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
472                          d_inode(cifs_file->dentry));
473                 /*
474                  * In strict cache mode we need invalidate mapping on the last
475                  * close  because it may cause a error when we open this file
476                  * again and get at least level II oplock.
477                  */
478                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
479                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
480                 cifs_set_oplock_level(cifsi, 0);
481         }
482
483         spin_unlock(&cifsi->open_file_lock);
484         spin_unlock(&tcon->open_file_lock);
485
486         oplock_break_cancelled = wait_oplock_handler ?
487                 cancel_work_sync(&cifs_file->oplock_break) : false;
488
489         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
490                 struct TCP_Server_Info *server = tcon->ses->server;
491                 unsigned int xid;
492
493                 xid = get_xid();
494                 if (server->ops->close_getattr)
495                         server->ops->close_getattr(xid, tcon, cifs_file);
496                 else if (server->ops->close)
497                         server->ops->close(xid, tcon, &cifs_file->fid);
498                 _free_xid(xid);
499         }
500
501         if (oplock_break_cancelled)
502                 cifs_done_oplock_break(cifsi);
503
504         cifs_del_pending_open(&open);
505
506         if (offload)
507                 queue_work(fileinfo_put_wq, &cifs_file->put);
508         else
509                 cifsFileInfo_put_final(cifs_file);
510 }
511
512 int cifs_open(struct inode *inode, struct file *file)
513
514 {
515         int rc = -EACCES;
516         unsigned int xid;
517         __u32 oplock;
518         struct cifs_sb_info *cifs_sb;
519         struct TCP_Server_Info *server;
520         struct cifs_tcon *tcon;
521         struct tcon_link *tlink;
522         struct cifsFileInfo *cfile = NULL;
523         void *page;
524         const char *full_path;
525         bool posix_open_ok = false;
526         struct cifs_fid fid;
527         struct cifs_pending_open open;
528
529         xid = get_xid();
530
531         cifs_sb = CIFS_SB(inode->i_sb);
532         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
533                 free_xid(xid);
534                 return -EIO;
535         }
536
537         tlink = cifs_sb_tlink(cifs_sb);
538         if (IS_ERR(tlink)) {
539                 free_xid(xid);
540                 return PTR_ERR(tlink);
541         }
542         tcon = tlink_tcon(tlink);
543         server = tcon->ses->server;
544
545         page = alloc_dentry_path();
546         full_path = build_path_from_dentry(file_dentry(file), page);
547         if (IS_ERR(full_path)) {
548                 rc = PTR_ERR(full_path);
549                 goto out;
550         }
551
552         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
553                  inode, file->f_flags, full_path);
554
555         if (file->f_flags & O_DIRECT &&
556             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
557                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
558                         file->f_op = &cifs_file_direct_nobrl_ops;
559                 else
560                         file->f_op = &cifs_file_direct_ops;
561         }
562
563         /* Get the cached handle as SMB2 close is deferred */
564         rc = cifs_get_readable_path(tcon, full_path, &cfile);
565         if (rc == 0) {
566                 if (file->f_flags == cfile->f_flags) {
567                         file->private_data = cfile;
568                         spin_lock(&CIFS_I(inode)->deferred_lock);
569                         cifs_del_deferred_close(cfile);
570                         spin_unlock(&CIFS_I(inode)->deferred_lock);
571                         goto use_cache;
572                 } else {
573                         _cifsFileInfo_put(cfile, true, false);
574                 }
575         }
576
577         if (server->oplocks)
578                 oplock = REQ_OPLOCK;
579         else
580                 oplock = 0;
581
582         if (!tcon->broken_posix_open && tcon->unix_ext &&
583             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
584                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
585                 /* can not refresh inode info since size could be stale */
586                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
587                                 cifs_sb->ctx->file_mode /* ignored */,
588                                 file->f_flags, &oplock, &fid.netfid, xid);
589                 if (rc == 0) {
590                         cifs_dbg(FYI, "posix open succeeded\n");
591                         posix_open_ok = true;
592                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
593                         if (tcon->ses->serverNOS)
594                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
595                                          tcon->ses->ip_addr,
596                                          tcon->ses->serverNOS);
597                         tcon->broken_posix_open = true;
598                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
599                          (rc != -EOPNOTSUPP)) /* path not found or net err */
600                         goto out;
601                 /*
602                  * Else fallthrough to retry open the old way on network i/o
603                  * or DFS errors.
604                  */
605         }
606
607         if (server->ops->get_lease_key)
608                 server->ops->get_lease_key(inode, &fid);
609
610         cifs_add_pending_open(&fid, tlink, &open);
611
612         if (!posix_open_ok) {
613                 if (server->ops->get_lease_key)
614                         server->ops->get_lease_key(inode, &fid);
615
616                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
617                                   file->f_flags, &oplock, &fid, xid);
618                 if (rc) {
619                         cifs_del_pending_open(&open);
620                         goto out;
621                 }
622         }
623
624         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
625         if (cfile == NULL) {
626                 if (server->ops->close)
627                         server->ops->close(xid, tcon, &fid);
628                 cifs_del_pending_open(&open);
629                 rc = -ENOMEM;
630                 goto out;
631         }
632
633         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
634                 /*
635                  * Time to set mode which we can not set earlier due to
636                  * problems creating new read-only files.
637                  */
638                 struct cifs_unix_set_info_args args = {
639                         .mode   = inode->i_mode,
640                         .uid    = INVALID_UID, /* no change */
641                         .gid    = INVALID_GID, /* no change */
642                         .ctime  = NO_CHANGE_64,
643                         .atime  = NO_CHANGE_64,
644                         .mtime  = NO_CHANGE_64,
645                         .device = 0,
646                 };
647                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
648                                        cfile->pid);
649         }
650
651 use_cache:
652         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
653                            file->f_mode & FMODE_WRITE);
654         if (file->f_flags & O_DIRECT &&
655             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
656              file->f_flags & O_APPEND))
657                 cifs_invalidate_cache(file_inode(file),
658                                       FSCACHE_INVAL_DIO_WRITE);
659
660 out:
661         free_dentry_path(page);
662         free_xid(xid);
663         cifs_put_tlink(tlink);
664         return rc;
665 }
666
667 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
668
669 /*
670  * Try to reacquire byte range locks that were released when session
671  * to server was lost.
672  */
673 static int
674 cifs_relock_file(struct cifsFileInfo *cfile)
675 {
676         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
677         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
678         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
679         int rc = 0;
680
681         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
682         if (cinode->can_cache_brlcks) {
683                 /* can cache locks - no need to relock */
684                 up_read(&cinode->lock_sem);
685                 return rc;
686         }
687
688         if (cap_unix(tcon->ses) &&
689             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
690             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
691                 rc = cifs_push_posix_locks(cfile);
692         else
693                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
694
695         up_read(&cinode->lock_sem);
696         return rc;
697 }
698
699 static int
700 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
701 {
702         int rc = -EACCES;
703         unsigned int xid;
704         __u32 oplock;
705         struct cifs_sb_info *cifs_sb;
706         struct cifs_tcon *tcon;
707         struct TCP_Server_Info *server;
708         struct cifsInodeInfo *cinode;
709         struct inode *inode;
710         void *page;
711         const char *full_path;
712         int desired_access;
713         int disposition = FILE_OPEN;
714         int create_options = CREATE_NOT_DIR;
715         struct cifs_open_parms oparms;
716
717         xid = get_xid();
718         mutex_lock(&cfile->fh_mutex);
719         if (!cfile->invalidHandle) {
720                 mutex_unlock(&cfile->fh_mutex);
721                 free_xid(xid);
722                 return 0;
723         }
724
725         inode = d_inode(cfile->dentry);
726         cifs_sb = CIFS_SB(inode->i_sb);
727         tcon = tlink_tcon(cfile->tlink);
728         server = tcon->ses->server;
729
730         /*
731          * Can not grab rename sem here because various ops, including those
732          * that already have the rename sem can end up causing writepage to get
733          * called and if the server was down that means we end up here, and we
734          * can never tell if the caller already has the rename_sem.
735          */
736         page = alloc_dentry_path();
737         full_path = build_path_from_dentry(cfile->dentry, page);
738         if (IS_ERR(full_path)) {
739                 mutex_unlock(&cfile->fh_mutex);
740                 free_dentry_path(page);
741                 free_xid(xid);
742                 return PTR_ERR(full_path);
743         }
744
745         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
746                  inode, cfile->f_flags, full_path);
747
748         if (tcon->ses->server->oplocks)
749                 oplock = REQ_OPLOCK;
750         else
751                 oplock = 0;
752
753         if (tcon->unix_ext && cap_unix(tcon->ses) &&
754             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
755                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
756                 /*
757                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
758                  * original open. Must mask them off for a reopen.
759                  */
760                 unsigned int oflags = cfile->f_flags &
761                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
762
763                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
764                                      cifs_sb->ctx->file_mode /* ignored */,
765                                      oflags, &oplock, &cfile->fid.netfid, xid);
766                 if (rc == 0) {
767                         cifs_dbg(FYI, "posix reopen succeeded\n");
768                         oparms.reconnect = true;
769                         goto reopen_success;
770                 }
771                 /*
772                  * fallthrough to retry open the old way on errors, especially
773                  * in the reconnect path it is important to retry hard
774                  */
775         }
776
777         desired_access = cifs_convert_flags(cfile->f_flags);
778
779         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
780         if (cfile->f_flags & O_SYNC)
781                 create_options |= CREATE_WRITE_THROUGH;
782
783         if (cfile->f_flags & O_DIRECT)
784                 create_options |= CREATE_NO_BUFFER;
785
786         if (server->ops->get_lease_key)
787                 server->ops->get_lease_key(inode, &cfile->fid);
788
789         oparms.tcon = tcon;
790         oparms.cifs_sb = cifs_sb;
791         oparms.desired_access = desired_access;
792         oparms.create_options = cifs_create_options(cifs_sb, create_options);
793         oparms.disposition = disposition;
794         oparms.path = full_path;
795         oparms.fid = &cfile->fid;
796         oparms.reconnect = true;
797
798         /*
799          * Can not refresh inode by passing in file_info buf to be returned by
800          * ops->open and then calling get_inode_info with returned buf since
801          * file might have write behind data that needs to be flushed and server
802          * version of file size can be stale. If we knew for sure that inode was
803          * not dirty locally we could do this.
804          */
805         rc = server->ops->open(xid, &oparms, &oplock, NULL);
806         if (rc == -ENOENT && oparms.reconnect == false) {
807                 /* durable handle timeout is expired - open the file again */
808                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
809                 /* indicate that we need to relock the file */
810                 oparms.reconnect = true;
811         }
812
813         if (rc) {
814                 mutex_unlock(&cfile->fh_mutex);
815                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
816                 cifs_dbg(FYI, "oplock: %d\n", oplock);
817                 goto reopen_error_exit;
818         }
819
820 reopen_success:
821         cfile->invalidHandle = false;
822         mutex_unlock(&cfile->fh_mutex);
823         cinode = CIFS_I(inode);
824
825         if (can_flush) {
826                 rc = filemap_write_and_wait(inode->i_mapping);
827                 if (!is_interrupt_error(rc))
828                         mapping_set_error(inode->i_mapping, rc);
829
830                 if (tcon->posix_extensions)
831                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
832                 else if (tcon->unix_ext)
833                         rc = cifs_get_inode_info_unix(&inode, full_path,
834                                                       inode->i_sb, xid);
835                 else
836                         rc = cifs_get_inode_info(&inode, full_path, NULL,
837                                                  inode->i_sb, xid, NULL);
838         }
839         /*
840          * Else we are writing out data to server already and could deadlock if
841          * we tried to flush data, and since we do not know if we have data that
842          * would invalidate the current end of file on the server we can not go
843          * to the server to get the new inode info.
844          */
845
846         /*
847          * If the server returned a read oplock and we have mandatory brlocks,
848          * set oplock level to None.
849          */
850         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
851                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
852                 oplock = 0;
853         }
854
855         server->ops->set_fid(cfile, &cfile->fid, oplock);
856         if (oparms.reconnect)
857                 cifs_relock_file(cfile);
858
859 reopen_error_exit:
860         free_dentry_path(page);
861         free_xid(xid);
862         return rc;
863 }
864
865 void smb2_deferred_work_close(struct work_struct *work)
866 {
867         struct cifsFileInfo *cfile = container_of(work,
868                         struct cifsFileInfo, deferred.work);
869
870         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
871         cifs_del_deferred_close(cfile);
872         cfile->deferred_close_scheduled = false;
873         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
874         _cifsFileInfo_put(cfile, true, false);
875 }
876
877 int cifs_close(struct inode *inode, struct file *file)
878 {
879         struct cifsFileInfo *cfile;
880         struct cifsInodeInfo *cinode = CIFS_I(inode);
881         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
882         struct cifs_deferred_close *dclose;
883
884         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
885
886         if (file->private_data != NULL) {
887                 cfile = file->private_data;
888                 file->private_data = NULL;
889                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
890                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
891                     cinode->lease_granted &&
892                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
893                     dclose) {
894                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
895                                 inode->i_ctime = inode->i_mtime = current_time(inode);
896                         }
897                         spin_lock(&cinode->deferred_lock);
898                         cifs_add_deferred_close(cfile, dclose);
899                         if (cfile->deferred_close_scheduled &&
900                             delayed_work_pending(&cfile->deferred)) {
901                                 /*
902                                  * If there is no pending work, mod_delayed_work queues new work.
903                                  * So, Increase the ref count to avoid use-after-free.
904                                  */
905                                 if (!mod_delayed_work(deferredclose_wq,
906                                                 &cfile->deferred, cifs_sb->ctx->acregmax))
907                                         cifsFileInfo_get(cfile);
908                         } else {
909                                 /* Deferred close for files */
910                                 queue_delayed_work(deferredclose_wq,
911                                                 &cfile->deferred, cifs_sb->ctx->acregmax);
912                                 cfile->deferred_close_scheduled = true;
913                                 spin_unlock(&cinode->deferred_lock);
914                                 return 0;
915                         }
916                         spin_unlock(&cinode->deferred_lock);
917                         _cifsFileInfo_put(cfile, true, false);
918                 } else {
919                         _cifsFileInfo_put(cfile, true, false);
920                         kfree(dclose);
921                 }
922         }
923
924         /* return code from the ->release op is always ignored */
925         return 0;
926 }
927
928 void
929 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
930 {
931         struct cifsFileInfo *open_file;
932         struct list_head *tmp;
933         struct list_head *tmp1;
934         struct list_head tmp_list;
935
936         if (!tcon->use_persistent || !tcon->need_reopen_files)
937                 return;
938
939         tcon->need_reopen_files = false;
940
941         cifs_dbg(FYI, "Reopen persistent handles\n");
942         INIT_LIST_HEAD(&tmp_list);
943
944         /* list all files open on tree connection, reopen resilient handles  */
945         spin_lock(&tcon->open_file_lock);
946         list_for_each(tmp, &tcon->openFileList) {
947                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
948                 if (!open_file->invalidHandle)
949                         continue;
950                 cifsFileInfo_get(open_file);
951                 list_add_tail(&open_file->rlist, &tmp_list);
952         }
953         spin_unlock(&tcon->open_file_lock);
954
955         list_for_each_safe(tmp, tmp1, &tmp_list) {
956                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
957                 if (cifs_reopen_file(open_file, false /* do not flush */))
958                         tcon->need_reopen_files = true;
959                 list_del_init(&open_file->rlist);
960                 cifsFileInfo_put(open_file);
961         }
962 }
963
964 int cifs_closedir(struct inode *inode, struct file *file)
965 {
966         int rc = 0;
967         unsigned int xid;
968         struct cifsFileInfo *cfile = file->private_data;
969         struct cifs_tcon *tcon;
970         struct TCP_Server_Info *server;
971         char *buf;
972
973         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
974
975         if (cfile == NULL)
976                 return rc;
977
978         xid = get_xid();
979         tcon = tlink_tcon(cfile->tlink);
980         server = tcon->ses->server;
981
982         cifs_dbg(FYI, "Freeing private data in close dir\n");
983         spin_lock(&cfile->file_info_lock);
984         if (server->ops->dir_needs_close(cfile)) {
985                 cfile->invalidHandle = true;
986                 spin_unlock(&cfile->file_info_lock);
987                 if (server->ops->close_dir)
988                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
989                 else
990                         rc = -ENOSYS;
991                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
992                 /* not much we can do if it fails anyway, ignore rc */
993                 rc = 0;
994         } else
995                 spin_unlock(&cfile->file_info_lock);
996
997         buf = cfile->srch_inf.ntwrk_buf_start;
998         if (buf) {
999                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1000                 cfile->srch_inf.ntwrk_buf_start = NULL;
1001                 if (cfile->srch_inf.smallBuf)
1002                         cifs_small_buf_release(buf);
1003                 else
1004                         cifs_buf_release(buf);
1005         }
1006
1007         cifs_put_tlink(cfile->tlink);
1008         kfree(file->private_data);
1009         file->private_data = NULL;
1010         /* BB can we lock the filestruct while this is going on? */
1011         free_xid(xid);
1012         return rc;
1013 }
1014
1015 static struct cifsLockInfo *
1016 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1017 {
1018         struct cifsLockInfo *lock =
1019                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1020         if (!lock)
1021                 return lock;
1022         lock->offset = offset;
1023         lock->length = length;
1024         lock->type = type;
1025         lock->pid = current->tgid;
1026         lock->flags = flags;
1027         INIT_LIST_HEAD(&lock->blist);
1028         init_waitqueue_head(&lock->block_q);
1029         return lock;
1030 }
1031
1032 void
1033 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1034 {
1035         struct cifsLockInfo *li, *tmp;
1036         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1037                 list_del_init(&li->blist);
1038                 wake_up(&li->block_q);
1039         }
1040 }
1041
1042 #define CIFS_LOCK_OP    0
1043 #define CIFS_READ_OP    1
1044 #define CIFS_WRITE_OP   2
1045
1046 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1047 static bool
1048 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1049                             __u64 length, __u8 type, __u16 flags,
1050                             struct cifsFileInfo *cfile,
1051                             struct cifsLockInfo **conf_lock, int rw_check)
1052 {
1053         struct cifsLockInfo *li;
1054         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1055         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1056
1057         list_for_each_entry(li, &fdlocks->locks, llist) {
1058                 if (offset + length <= li->offset ||
1059                     offset >= li->offset + li->length)
1060                         continue;
1061                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1062                     server->ops->compare_fids(cfile, cur_cfile)) {
1063                         /* shared lock prevents write op through the same fid */
1064                         if (!(li->type & server->vals->shared_lock_type) ||
1065                             rw_check != CIFS_WRITE_OP)
1066                                 continue;
1067                 }
1068                 if ((type & server->vals->shared_lock_type) &&
1069                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1070                      current->tgid == li->pid) || type == li->type))
1071                         continue;
1072                 if (rw_check == CIFS_LOCK_OP &&
1073                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1074                     server->ops->compare_fids(cfile, cur_cfile))
1075                         continue;
1076                 if (conf_lock)
1077                         *conf_lock = li;
1078                 return true;
1079         }
1080         return false;
1081 }
1082
1083 bool
1084 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1085                         __u8 type, __u16 flags,
1086                         struct cifsLockInfo **conf_lock, int rw_check)
1087 {
1088         bool rc = false;
1089         struct cifs_fid_locks *cur;
1090         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1091
1092         list_for_each_entry(cur, &cinode->llist, llist) {
1093                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1094                                                  flags, cfile, conf_lock,
1095                                                  rw_check);
1096                 if (rc)
1097                         break;
1098         }
1099
1100         return rc;
1101 }
1102
1103 /*
1104  * Check if there is another lock that prevents us to set the lock (mandatory
1105  * style). If such a lock exists, update the flock structure with its
1106  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1107  * or leave it the same if we can't. Returns 0 if we don't need to request to
1108  * the server or 1 otherwise.
1109  */
1110 static int
1111 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1112                __u8 type, struct file_lock *flock)
1113 {
1114         int rc = 0;
1115         struct cifsLockInfo *conf_lock;
1116         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1117         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1118         bool exist;
1119
1120         down_read(&cinode->lock_sem);
1121
1122         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1123                                         flock->fl_flags, &conf_lock,
1124                                         CIFS_LOCK_OP);
1125         if (exist) {
1126                 flock->fl_start = conf_lock->offset;
1127                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1128                 flock->fl_pid = conf_lock->pid;
1129                 if (conf_lock->type & server->vals->shared_lock_type)
1130                         flock->fl_type = F_RDLCK;
1131                 else
1132                         flock->fl_type = F_WRLCK;
1133         } else if (!cinode->can_cache_brlcks)
1134                 rc = 1;
1135         else
1136                 flock->fl_type = F_UNLCK;
1137
1138         up_read(&cinode->lock_sem);
1139         return rc;
1140 }
1141
1142 static void
1143 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1144 {
1145         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1146         cifs_down_write(&cinode->lock_sem);
1147         list_add_tail(&lock->llist, &cfile->llist->locks);
1148         up_write(&cinode->lock_sem);
1149 }
1150
1151 /*
1152  * Set the byte-range lock (mandatory style). Returns:
1153  * 1) 0, if we set the lock and don't need to request to the server;
1154  * 2) 1, if no locks prevent us but we need to request to the server;
1155  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1156  */
1157 static int
1158 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1159                  bool wait)
1160 {
1161         struct cifsLockInfo *conf_lock;
1162         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1163         bool exist;
1164         int rc = 0;
1165
1166 try_again:
1167         exist = false;
1168         cifs_down_write(&cinode->lock_sem);
1169
1170         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1171                                         lock->type, lock->flags, &conf_lock,
1172                                         CIFS_LOCK_OP);
1173         if (!exist && cinode->can_cache_brlcks) {
1174                 list_add_tail(&lock->llist, &cfile->llist->locks);
1175                 up_write(&cinode->lock_sem);
1176                 return rc;
1177         }
1178
1179         if (!exist)
1180                 rc = 1;
1181         else if (!wait)
1182                 rc = -EACCES;
1183         else {
1184                 list_add_tail(&lock->blist, &conf_lock->blist);
1185                 up_write(&cinode->lock_sem);
1186                 rc = wait_event_interruptible(lock->block_q,
1187                                         (lock->blist.prev == &lock->blist) &&
1188                                         (lock->blist.next == &lock->blist));
1189                 if (!rc)
1190                         goto try_again;
1191                 cifs_down_write(&cinode->lock_sem);
1192                 list_del_init(&lock->blist);
1193         }
1194
1195         up_write(&cinode->lock_sem);
1196         return rc;
1197 }
1198
1199 /*
1200  * Check if there is another lock that prevents us to set the lock (posix
1201  * style). If such a lock exists, update the flock structure with its
1202  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1203  * or leave it the same if we can't. Returns 0 if we don't need to request to
1204  * the server or 1 otherwise.
1205  */
1206 static int
1207 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1208 {
1209         int rc = 0;
1210         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1211         unsigned char saved_type = flock->fl_type;
1212
1213         if ((flock->fl_flags & FL_POSIX) == 0)
1214                 return 1;
1215
1216         down_read(&cinode->lock_sem);
1217         posix_test_lock(file, flock);
1218
1219         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1220                 flock->fl_type = saved_type;
1221                 rc = 1;
1222         }
1223
1224         up_read(&cinode->lock_sem);
1225         return rc;
1226 }
1227
1228 /*
1229  * Set the byte-range lock (posix style). Returns:
1230  * 1) <0, if the error occurs while setting the lock;
1231  * 2) 0, if we set the lock and don't need to request to the server;
1232  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1233  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1234  */
1235 static int
1236 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1237 {
1238         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1239         int rc = FILE_LOCK_DEFERRED + 1;
1240
1241         if ((flock->fl_flags & FL_POSIX) == 0)
1242                 return rc;
1243
1244         cifs_down_write(&cinode->lock_sem);
1245         if (!cinode->can_cache_brlcks) {
1246                 up_write(&cinode->lock_sem);
1247                 return rc;
1248         }
1249
1250         rc = posix_lock_file(file, flock, NULL);
1251         up_write(&cinode->lock_sem);
1252         return rc;
1253 }
1254
1255 int
1256 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1257 {
1258         unsigned int xid;
1259         int rc = 0, stored_rc;
1260         struct cifsLockInfo *li, *tmp;
1261         struct cifs_tcon *tcon;
1262         unsigned int num, max_num, max_buf;
1263         LOCKING_ANDX_RANGE *buf, *cur;
1264         static const int types[] = {
1265                 LOCKING_ANDX_LARGE_FILES,
1266                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1267         };
1268         int i;
1269
1270         xid = get_xid();
1271         tcon = tlink_tcon(cfile->tlink);
1272
1273         /*
1274          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1275          * and check it before using.
1276          */
1277         max_buf = tcon->ses->server->maxBuf;
1278         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1279                 free_xid(xid);
1280                 return -EINVAL;
1281         }
1282
1283         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1284                      PAGE_SIZE);
1285         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1286                         PAGE_SIZE);
1287         max_num = (max_buf - sizeof(struct smb_hdr)) /
1288                                                 sizeof(LOCKING_ANDX_RANGE);
1289         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1290         if (!buf) {
1291                 free_xid(xid);
1292                 return -ENOMEM;
1293         }
1294
1295         for (i = 0; i < 2; i++) {
1296                 cur = buf;
1297                 num = 0;
1298                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1299                         if (li->type != types[i])
1300                                 continue;
1301                         cur->Pid = cpu_to_le16(li->pid);
1302                         cur->LengthLow = cpu_to_le32((u32)li->length);
1303                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1304                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1305                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1306                         if (++num == max_num) {
1307                                 stored_rc = cifs_lockv(xid, tcon,
1308                                                        cfile->fid.netfid,
1309                                                        (__u8)li->type, 0, num,
1310                                                        buf);
1311                                 if (stored_rc)
1312                                         rc = stored_rc;
1313                                 cur = buf;
1314                                 num = 0;
1315                         } else
1316                                 cur++;
1317                 }
1318
1319                 if (num) {
1320                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1321                                                (__u8)types[i], 0, num, buf);
1322                         if (stored_rc)
1323                                 rc = stored_rc;
1324                 }
1325         }
1326
1327         kfree(buf);
1328         free_xid(xid);
1329         return rc;
1330 }
1331
1332 static __u32
1333 hash_lockowner(fl_owner_t owner)
1334 {
1335         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1336 }
1337
1338 struct lock_to_push {
1339         struct list_head llist;
1340         __u64 offset;
1341         __u64 length;
1342         __u32 pid;
1343         __u16 netfid;
1344         __u8 type;
1345 };
1346
1347 static int
1348 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1349 {
1350         struct inode *inode = d_inode(cfile->dentry);
1351         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1352         struct file_lock *flock;
1353         struct file_lock_context *flctx = inode->i_flctx;
1354         unsigned int count = 0, i;
1355         int rc = 0, xid, type;
1356         struct list_head locks_to_send, *el;
1357         struct lock_to_push *lck, *tmp;
1358         __u64 length;
1359
1360         xid = get_xid();
1361
1362         if (!flctx)
1363                 goto out;
1364
1365         spin_lock(&flctx->flc_lock);
1366         list_for_each(el, &flctx->flc_posix) {
1367                 count++;
1368         }
1369         spin_unlock(&flctx->flc_lock);
1370
1371         INIT_LIST_HEAD(&locks_to_send);
1372
1373         /*
1374          * Allocating count locks is enough because no FL_POSIX locks can be
1375          * added to the list while we are holding cinode->lock_sem that
1376          * protects locking operations of this inode.
1377          */
1378         for (i = 0; i < count; i++) {
1379                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1380                 if (!lck) {
1381                         rc = -ENOMEM;
1382                         goto err_out;
1383                 }
1384                 list_add_tail(&lck->llist, &locks_to_send);
1385         }
1386
1387         el = locks_to_send.next;
1388         spin_lock(&flctx->flc_lock);
1389         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1390                 if (el == &locks_to_send) {
1391                         /*
1392                          * The list ended. We don't have enough allocated
1393                          * structures - something is really wrong.
1394                          */
1395                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1396                         break;
1397                 }
1398                 length = 1 + flock->fl_end - flock->fl_start;
1399                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1400                         type = CIFS_RDLCK;
1401                 else
1402                         type = CIFS_WRLCK;
1403                 lck = list_entry(el, struct lock_to_push, llist);
1404                 lck->pid = hash_lockowner(flock->fl_owner);
1405                 lck->netfid = cfile->fid.netfid;
1406                 lck->length = length;
1407                 lck->type = type;
1408                 lck->offset = flock->fl_start;
1409         }
1410         spin_unlock(&flctx->flc_lock);
1411
1412         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1413                 int stored_rc;
1414
1415                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1416                                              lck->offset, lck->length, NULL,
1417                                              lck->type, 0);
1418                 if (stored_rc)
1419                         rc = stored_rc;
1420                 list_del(&lck->llist);
1421                 kfree(lck);
1422         }
1423
1424 out:
1425         free_xid(xid);
1426         return rc;
1427 err_out:
1428         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1429                 list_del(&lck->llist);
1430                 kfree(lck);
1431         }
1432         goto out;
1433 }
1434
1435 static int
1436 cifs_push_locks(struct cifsFileInfo *cfile)
1437 {
1438         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1439         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1440         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1441         int rc = 0;
1442
1443         /* we are going to update can_cache_brlcks here - need a write access */
1444         cifs_down_write(&cinode->lock_sem);
1445         if (!cinode->can_cache_brlcks) {
1446                 up_write(&cinode->lock_sem);
1447                 return rc;
1448         }
1449
1450         if (cap_unix(tcon->ses) &&
1451             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1452             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1453                 rc = cifs_push_posix_locks(cfile);
1454         else
1455                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1456
1457         cinode->can_cache_brlcks = false;
1458         up_write(&cinode->lock_sem);
1459         return rc;
1460 }
1461
1462 static void
1463 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1464                 bool *wait_flag, struct TCP_Server_Info *server)
1465 {
1466         if (flock->fl_flags & FL_POSIX)
1467                 cifs_dbg(FYI, "Posix\n");
1468         if (flock->fl_flags & FL_FLOCK)
1469                 cifs_dbg(FYI, "Flock\n");
1470         if (flock->fl_flags & FL_SLEEP) {
1471                 cifs_dbg(FYI, "Blocking lock\n");
1472                 *wait_flag = true;
1473         }
1474         if (flock->fl_flags & FL_ACCESS)
1475                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1476         if (flock->fl_flags & FL_LEASE)
1477                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1478         if (flock->fl_flags &
1479             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1480                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1481                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1482
1483         *type = server->vals->large_lock_type;
1484         if (flock->fl_type == F_WRLCK) {
1485                 cifs_dbg(FYI, "F_WRLCK\n");
1486                 *type |= server->vals->exclusive_lock_type;
1487                 *lock = 1;
1488         } else if (flock->fl_type == F_UNLCK) {
1489                 cifs_dbg(FYI, "F_UNLCK\n");
1490                 *type |= server->vals->unlock_lock_type;
1491                 *unlock = 1;
1492                 /* Check if unlock includes more than one lock range */
1493         } else if (flock->fl_type == F_RDLCK) {
1494                 cifs_dbg(FYI, "F_RDLCK\n");
1495                 *type |= server->vals->shared_lock_type;
1496                 *lock = 1;
1497         } else if (flock->fl_type == F_EXLCK) {
1498                 cifs_dbg(FYI, "F_EXLCK\n");
1499                 *type |= server->vals->exclusive_lock_type;
1500                 *lock = 1;
1501         } else if (flock->fl_type == F_SHLCK) {
1502                 cifs_dbg(FYI, "F_SHLCK\n");
1503                 *type |= server->vals->shared_lock_type;
1504                 *lock = 1;
1505         } else
1506                 cifs_dbg(FYI, "Unknown type of lock\n");
1507 }
1508
1509 static int
1510 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1511            bool wait_flag, bool posix_lck, unsigned int xid)
1512 {
1513         int rc = 0;
1514         __u64 length = 1 + flock->fl_end - flock->fl_start;
1515         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1516         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1517         struct TCP_Server_Info *server = tcon->ses->server;
1518         __u16 netfid = cfile->fid.netfid;
1519
1520         if (posix_lck) {
1521                 int posix_lock_type;
1522
1523                 rc = cifs_posix_lock_test(file, flock);
1524                 if (!rc)
1525                         return rc;
1526
1527                 if (type & server->vals->shared_lock_type)
1528                         posix_lock_type = CIFS_RDLCK;
1529                 else
1530                         posix_lock_type = CIFS_WRLCK;
1531                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1532                                       hash_lockowner(flock->fl_owner),
1533                                       flock->fl_start, length, flock,
1534                                       posix_lock_type, wait_flag);
1535                 return rc;
1536         }
1537
1538         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1539         if (!rc)
1540                 return rc;
1541
1542         /* BB we could chain these into one lock request BB */
1543         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1544                                     1, 0, false);
1545         if (rc == 0) {
1546                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1547                                             type, 0, 1, false);
1548                 flock->fl_type = F_UNLCK;
1549                 if (rc != 0)
1550                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1551                                  rc);
1552                 return 0;
1553         }
1554
1555         if (type & server->vals->shared_lock_type) {
1556                 flock->fl_type = F_WRLCK;
1557                 return 0;
1558         }
1559
1560         type &= ~server->vals->exclusive_lock_type;
1561
1562         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1563                                     type | server->vals->shared_lock_type,
1564                                     1, 0, false);
1565         if (rc == 0) {
1566                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1567                         type | server->vals->shared_lock_type, 0, 1, false);
1568                 flock->fl_type = F_RDLCK;
1569                 if (rc != 0)
1570                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1571                                  rc);
1572         } else
1573                 flock->fl_type = F_WRLCK;
1574
1575         return 0;
1576 }
1577
1578 void
1579 cifs_move_llist(struct list_head *source, struct list_head *dest)
1580 {
1581         struct list_head *li, *tmp;
1582         list_for_each_safe(li, tmp, source)
1583                 list_move(li, dest);
1584 }
1585
1586 void
1587 cifs_free_llist(struct list_head *llist)
1588 {
1589         struct cifsLockInfo *li, *tmp;
1590         list_for_each_entry_safe(li, tmp, llist, llist) {
1591                 cifs_del_lock_waiters(li);
1592                 list_del(&li->llist);
1593                 kfree(li);
1594         }
1595 }
1596
1597 int
1598 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1599                   unsigned int xid)
1600 {
1601         int rc = 0, stored_rc;
1602         static const int types[] = {
1603                 LOCKING_ANDX_LARGE_FILES,
1604                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1605         };
1606         unsigned int i;
1607         unsigned int max_num, num, max_buf;
1608         LOCKING_ANDX_RANGE *buf, *cur;
1609         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1610         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1611         struct cifsLockInfo *li, *tmp;
1612         __u64 length = 1 + flock->fl_end - flock->fl_start;
1613         struct list_head tmp_llist;
1614
1615         INIT_LIST_HEAD(&tmp_llist);
1616
1617         /*
1618          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1619          * and check it before using.
1620          */
1621         max_buf = tcon->ses->server->maxBuf;
1622         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1623                 return -EINVAL;
1624
1625         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1626                      PAGE_SIZE);
1627         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1628                         PAGE_SIZE);
1629         max_num = (max_buf - sizeof(struct smb_hdr)) /
1630                                                 sizeof(LOCKING_ANDX_RANGE);
1631         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1632         if (!buf)
1633                 return -ENOMEM;
1634
1635         cifs_down_write(&cinode->lock_sem);
1636         for (i = 0; i < 2; i++) {
1637                 cur = buf;
1638                 num = 0;
1639                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1640                         if (flock->fl_start > li->offset ||
1641                             (flock->fl_start + length) <
1642                             (li->offset + li->length))
1643                                 continue;
1644                         if (current->tgid != li->pid)
1645                                 continue;
1646                         if (types[i] != li->type)
1647                                 continue;
1648                         if (cinode->can_cache_brlcks) {
1649                                 /*
1650                                  * We can cache brlock requests - simply remove
1651                                  * a lock from the file's list.
1652                                  */
1653                                 list_del(&li->llist);
1654                                 cifs_del_lock_waiters(li);
1655                                 kfree(li);
1656                                 continue;
1657                         }
1658                         cur->Pid = cpu_to_le16(li->pid);
1659                         cur->LengthLow = cpu_to_le32((u32)li->length);
1660                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1661                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1662                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1663                         /*
1664                          * We need to save a lock here to let us add it again to
1665                          * the file's list if the unlock range request fails on
1666                          * the server.
1667                          */
1668                         list_move(&li->llist, &tmp_llist);
1669                         if (++num == max_num) {
1670                                 stored_rc = cifs_lockv(xid, tcon,
1671                                                        cfile->fid.netfid,
1672                                                        li->type, num, 0, buf);
1673                                 if (stored_rc) {
1674                                         /*
1675                                          * We failed on the unlock range
1676                                          * request - add all locks from the tmp
1677                                          * list to the head of the file's list.
1678                                          */
1679                                         cifs_move_llist(&tmp_llist,
1680                                                         &cfile->llist->locks);
1681                                         rc = stored_rc;
1682                                 } else
1683                                         /*
1684                                          * The unlock range request succeed -
1685                                          * free the tmp list.
1686                                          */
1687                                         cifs_free_llist(&tmp_llist);
1688                                 cur = buf;
1689                                 num = 0;
1690                         } else
1691                                 cur++;
1692                 }
1693                 if (num) {
1694                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1695                                                types[i], num, 0, buf);
1696                         if (stored_rc) {
1697                                 cifs_move_llist(&tmp_llist,
1698                                                 &cfile->llist->locks);
1699                                 rc = stored_rc;
1700                         } else
1701                                 cifs_free_llist(&tmp_llist);
1702                 }
1703         }
1704
1705         up_write(&cinode->lock_sem);
1706         kfree(buf);
1707         return rc;
1708 }
1709
1710 static int
1711 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1712            bool wait_flag, bool posix_lck, int lock, int unlock,
1713            unsigned int xid)
1714 {
1715         int rc = 0;
1716         __u64 length = 1 + flock->fl_end - flock->fl_start;
1717         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1718         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1719         struct TCP_Server_Info *server = tcon->ses->server;
1720         struct inode *inode = d_inode(cfile->dentry);
1721
1722         if (posix_lck) {
1723                 int posix_lock_type;
1724
1725                 rc = cifs_posix_lock_set(file, flock);
1726                 if (rc <= FILE_LOCK_DEFERRED)
1727                         return rc;
1728
1729                 if (type & server->vals->shared_lock_type)
1730                         posix_lock_type = CIFS_RDLCK;
1731                 else
1732                         posix_lock_type = CIFS_WRLCK;
1733
1734                 if (unlock == 1)
1735                         posix_lock_type = CIFS_UNLCK;
1736
1737                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1738                                       hash_lockowner(flock->fl_owner),
1739                                       flock->fl_start, length,
1740                                       NULL, posix_lock_type, wait_flag);
1741                 goto out;
1742         }
1743
1744         if (lock) {
1745                 struct cifsLockInfo *lock;
1746
1747                 lock = cifs_lock_init(flock->fl_start, length, type,
1748                                       flock->fl_flags);
1749                 if (!lock)
1750                         return -ENOMEM;
1751
1752                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1753                 if (rc < 0) {
1754                         kfree(lock);
1755                         return rc;
1756                 }
1757                 if (!rc)
1758                         goto out;
1759
1760                 /*
1761                  * Windows 7 server can delay breaking lease from read to None
1762                  * if we set a byte-range lock on a file - break it explicitly
1763                  * before sending the lock to the server to be sure the next
1764                  * read won't conflict with non-overlapted locks due to
1765                  * pagereading.
1766                  */
1767                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1768                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1769                         cifs_zap_mapping(inode);
1770                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1771                                  inode);
1772                         CIFS_I(inode)->oplock = 0;
1773                 }
1774
1775                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1776                                             type, 1, 0, wait_flag);
1777                 if (rc) {
1778                         kfree(lock);
1779                         return rc;
1780                 }
1781
1782                 cifs_lock_add(cfile, lock);
1783         } else if (unlock)
1784                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1785
1786 out:
1787         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1788                 /*
1789                  * If this is a request to remove all locks because we
1790                  * are closing the file, it doesn't matter if the
1791                  * unlocking failed as both cifs.ko and the SMB server
1792                  * remove the lock on file close
1793                  */
1794                 if (rc) {
1795                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1796                         if (!(flock->fl_flags & FL_CLOSE))
1797                                 return rc;
1798                 }
1799                 rc = locks_lock_file_wait(file, flock);
1800         }
1801         return rc;
1802 }
1803
1804 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1805 {
1806         int rc, xid;
1807         int lock = 0, unlock = 0;
1808         bool wait_flag = false;
1809         bool posix_lck = false;
1810         struct cifs_sb_info *cifs_sb;
1811         struct cifs_tcon *tcon;
1812         struct cifsFileInfo *cfile;
1813         __u32 type;
1814
1815         rc = -EACCES;
1816         xid = get_xid();
1817
1818         if (!(fl->fl_flags & FL_FLOCK))
1819                 return -ENOLCK;
1820
1821         cfile = (struct cifsFileInfo *)file->private_data;
1822         tcon = tlink_tcon(cfile->tlink);
1823
1824         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1825                         tcon->ses->server);
1826         cifs_sb = CIFS_FILE_SB(file);
1827
1828         if (cap_unix(tcon->ses) &&
1829             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1830             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1831                 posix_lck = true;
1832
1833         if (!lock && !unlock) {
1834                 /*
1835                  * if no lock or unlock then nothing to do since we do not
1836                  * know what it is
1837                  */
1838                 free_xid(xid);
1839                 return -EOPNOTSUPP;
1840         }
1841
1842         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1843                         xid);
1844         free_xid(xid);
1845         return rc;
1846
1847
1848 }
1849
1850 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1851 {
1852         int rc, xid;
1853         int lock = 0, unlock = 0;
1854         bool wait_flag = false;
1855         bool posix_lck = false;
1856         struct cifs_sb_info *cifs_sb;
1857         struct cifs_tcon *tcon;
1858         struct cifsFileInfo *cfile;
1859         __u32 type;
1860
1861         rc = -EACCES;
1862         xid = get_xid();
1863
1864         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1865                  cmd, flock->fl_flags, flock->fl_type,
1866                  flock->fl_start, flock->fl_end);
1867
1868         cfile = (struct cifsFileInfo *)file->private_data;
1869         tcon = tlink_tcon(cfile->tlink);
1870
1871         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1872                         tcon->ses->server);
1873         cifs_sb = CIFS_FILE_SB(file);
1874         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1875
1876         if (cap_unix(tcon->ses) &&
1877             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1878             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1879                 posix_lck = true;
1880         /*
1881          * BB add code here to normalize offset and length to account for
1882          * negative length which we can not accept over the wire.
1883          */
1884         if (IS_GETLK(cmd)) {
1885                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1886                 free_xid(xid);
1887                 return rc;
1888         }
1889
1890         if (!lock && !unlock) {
1891                 /*
1892                  * if no lock or unlock then nothing to do since we do not
1893                  * know what it is
1894                  */
1895                 free_xid(xid);
1896                 return -EOPNOTSUPP;
1897         }
1898
1899         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1900                         xid);
1901         free_xid(xid);
1902         return rc;
1903 }
1904
1905 /*
1906  * update the file size (if needed) after a write. Should be called with
1907  * the inode->i_lock held
1908  */
1909 void
1910 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1911                       unsigned int bytes_written)
1912 {
1913         loff_t end_of_write = offset + bytes_written;
1914
1915         if (end_of_write > cifsi->server_eof)
1916                 cifsi->server_eof = end_of_write;
1917 }
1918
1919 static ssize_t
1920 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1921            size_t write_size, loff_t *offset)
1922 {
1923         int rc = 0;
1924         unsigned int bytes_written = 0;
1925         unsigned int total_written;
1926         struct cifs_tcon *tcon;
1927         struct TCP_Server_Info *server;
1928         unsigned int xid;
1929         struct dentry *dentry = open_file->dentry;
1930         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1931         struct cifs_io_parms io_parms = {0};
1932
1933         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1934                  write_size, *offset, dentry);
1935
1936         tcon = tlink_tcon(open_file->tlink);
1937         server = tcon->ses->server;
1938
1939         if (!server->ops->sync_write)
1940                 return -ENOSYS;
1941
1942         xid = get_xid();
1943
1944         for (total_written = 0; write_size > total_written;
1945              total_written += bytes_written) {
1946                 rc = -EAGAIN;
1947                 while (rc == -EAGAIN) {
1948                         struct kvec iov[2];
1949                         unsigned int len;
1950
1951                         if (open_file->invalidHandle) {
1952                                 /* we could deadlock if we called
1953                                    filemap_fdatawait from here so tell
1954                                    reopen_file not to flush data to
1955                                    server now */
1956                                 rc = cifs_reopen_file(open_file, false);
1957                                 if (rc != 0)
1958                                         break;
1959                         }
1960
1961                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1962                                   (unsigned int)write_size - total_written);
1963                         /* iov[0] is reserved for smb header */
1964                         iov[1].iov_base = (char *)write_data + total_written;
1965                         iov[1].iov_len = len;
1966                         io_parms.pid = pid;
1967                         io_parms.tcon = tcon;
1968                         io_parms.offset = *offset;
1969                         io_parms.length = len;
1970                         rc = server->ops->sync_write(xid, &open_file->fid,
1971                                         &io_parms, &bytes_written, iov, 1);
1972                 }
1973                 if (rc || (bytes_written == 0)) {
1974                         if (total_written)
1975                                 break;
1976                         else {
1977                                 free_xid(xid);
1978                                 return rc;
1979                         }
1980                 } else {
1981                         spin_lock(&d_inode(dentry)->i_lock);
1982                         cifs_update_eof(cifsi, *offset, bytes_written);
1983                         spin_unlock(&d_inode(dentry)->i_lock);
1984                         *offset += bytes_written;
1985                 }
1986         }
1987
1988         cifs_stats_bytes_written(tcon, total_written);
1989
1990         if (total_written > 0) {
1991                 spin_lock(&d_inode(dentry)->i_lock);
1992                 if (*offset > d_inode(dentry)->i_size) {
1993                         i_size_write(d_inode(dentry), *offset);
1994                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1995                 }
1996                 spin_unlock(&d_inode(dentry)->i_lock);
1997         }
1998         mark_inode_dirty_sync(d_inode(dentry));
1999         free_xid(xid);
2000         return total_written;
2001 }
2002
2003 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2004                                         bool fsuid_only)
2005 {
2006         struct cifsFileInfo *open_file = NULL;
2007         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2008
2009         /* only filter by fsuid on multiuser mounts */
2010         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2011                 fsuid_only = false;
2012
2013         spin_lock(&cifs_inode->open_file_lock);
2014         /* we could simply get the first_list_entry since write-only entries
2015            are always at the end of the list but since the first entry might
2016            have a close pending, we go through the whole list */
2017         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2018                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2019                         continue;
2020                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2021                         if ((!open_file->invalidHandle)) {
2022                                 /* found a good file */
2023                                 /* lock it so it will not be closed on us */
2024                                 cifsFileInfo_get(open_file);
2025                                 spin_unlock(&cifs_inode->open_file_lock);
2026                                 return open_file;
2027                         } /* else might as well continue, and look for
2028                              another, or simply have the caller reopen it
2029                              again rather than trying to fix this handle */
2030                 } else /* write only file */
2031                         break; /* write only files are last so must be done */
2032         }
2033         spin_unlock(&cifs_inode->open_file_lock);
2034         return NULL;
2035 }
2036
2037 /* Return -EBADF if no handle is found and general rc otherwise */
2038 int
2039 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2040                        struct cifsFileInfo **ret_file)
2041 {
2042         struct cifsFileInfo *open_file, *inv_file = NULL;
2043         struct cifs_sb_info *cifs_sb;
2044         bool any_available = false;
2045         int rc = -EBADF;
2046         unsigned int refind = 0;
2047         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2048         bool with_delete = flags & FIND_WR_WITH_DELETE;
2049         *ret_file = NULL;
2050
2051         /*
2052          * Having a null inode here (because mapping->host was set to zero by
2053          * the VFS or MM) should not happen but we had reports of on oops (due
2054          * to it being zero) during stress testcases so we need to check for it
2055          */
2056
2057         if (cifs_inode == NULL) {
2058                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2059                 dump_stack();
2060                 return rc;
2061         }
2062
2063         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2064
2065         /* only filter by fsuid on multiuser mounts */
2066         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2067                 fsuid_only = false;
2068
2069         spin_lock(&cifs_inode->open_file_lock);
2070 refind_writable:
2071         if (refind > MAX_REOPEN_ATT) {
2072                 spin_unlock(&cifs_inode->open_file_lock);
2073                 return rc;
2074         }
2075         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2076                 if (!any_available && open_file->pid != current->tgid)
2077                         continue;
2078                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2079                         continue;
2080                 if (with_delete && !(open_file->fid.access & DELETE))
2081                         continue;
2082                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2083                         if (!open_file->invalidHandle) {
2084                                 /* found a good writable file */
2085                                 cifsFileInfo_get(open_file);
2086                                 spin_unlock(&cifs_inode->open_file_lock);
2087                                 *ret_file = open_file;
2088                                 return 0;
2089                         } else {
2090                                 if (!inv_file)
2091                                         inv_file = open_file;
2092                         }
2093                 }
2094         }
2095         /* couldn't find useable FH with same pid, try any available */
2096         if (!any_available) {
2097                 any_available = true;
2098                 goto refind_writable;
2099         }
2100
2101         if (inv_file) {
2102                 any_available = false;
2103                 cifsFileInfo_get(inv_file);
2104         }
2105
2106         spin_unlock(&cifs_inode->open_file_lock);
2107
2108         if (inv_file) {
2109                 rc = cifs_reopen_file(inv_file, false);
2110                 if (!rc) {
2111                         *ret_file = inv_file;
2112                         return 0;
2113                 }
2114
2115                 spin_lock(&cifs_inode->open_file_lock);
2116                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2117                 spin_unlock(&cifs_inode->open_file_lock);
2118                 cifsFileInfo_put(inv_file);
2119                 ++refind;
2120                 inv_file = NULL;
2121                 spin_lock(&cifs_inode->open_file_lock);
2122                 goto refind_writable;
2123         }
2124
2125         return rc;
2126 }
2127
2128 struct cifsFileInfo *
2129 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2130 {
2131         struct cifsFileInfo *cfile;
2132         int rc;
2133
2134         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2135         if (rc)
2136                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2137
2138         return cfile;
2139 }
2140
2141 int
2142 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2143                        int flags,
2144                        struct cifsFileInfo **ret_file)
2145 {
2146         struct cifsFileInfo *cfile;
2147         void *page = alloc_dentry_path();
2148
2149         *ret_file = NULL;
2150
2151         spin_lock(&tcon->open_file_lock);
2152         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2153                 struct cifsInodeInfo *cinode;
2154                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2155                 if (IS_ERR(full_path)) {
2156                         spin_unlock(&tcon->open_file_lock);
2157                         free_dentry_path(page);
2158                         return PTR_ERR(full_path);
2159                 }
2160                 if (strcmp(full_path, name))
2161                         continue;
2162
2163                 cinode = CIFS_I(d_inode(cfile->dentry));
2164                 spin_unlock(&tcon->open_file_lock);
2165                 free_dentry_path(page);
2166                 return cifs_get_writable_file(cinode, flags, ret_file);
2167         }
2168
2169         spin_unlock(&tcon->open_file_lock);
2170         free_dentry_path(page);
2171         return -ENOENT;
2172 }
2173
2174 int
2175 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2176                        struct cifsFileInfo **ret_file)
2177 {
2178         struct cifsFileInfo *cfile;
2179         void *page = alloc_dentry_path();
2180
2181         *ret_file = NULL;
2182
2183         spin_lock(&tcon->open_file_lock);
2184         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2185                 struct cifsInodeInfo *cinode;
2186                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2187                 if (IS_ERR(full_path)) {
2188                         spin_unlock(&tcon->open_file_lock);
2189                         free_dentry_path(page);
2190                         return PTR_ERR(full_path);
2191                 }
2192                 if (strcmp(full_path, name))
2193                         continue;
2194
2195                 cinode = CIFS_I(d_inode(cfile->dentry));
2196                 spin_unlock(&tcon->open_file_lock);
2197                 free_dentry_path(page);
2198                 *ret_file = find_readable_file(cinode, 0);
2199                 return *ret_file ? 0 : -ENOENT;
2200         }
2201
2202         spin_unlock(&tcon->open_file_lock);
2203         free_dentry_path(page);
2204         return -ENOENT;
2205 }
2206
2207 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2208 {
2209         struct address_space *mapping = page->mapping;
2210         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2211         char *write_data;
2212         int rc = -EFAULT;
2213         int bytes_written = 0;
2214         struct inode *inode;
2215         struct cifsFileInfo *open_file;
2216
2217         if (!mapping || !mapping->host)
2218                 return -EFAULT;
2219
2220         inode = page->mapping->host;
2221
2222         offset += (loff_t)from;
2223         write_data = kmap(page);
2224         write_data += from;
2225
2226         if ((to > PAGE_SIZE) || (from > to)) {
2227                 kunmap(page);
2228                 return -EIO;
2229         }
2230
2231         /* racing with truncate? */
2232         if (offset > mapping->host->i_size) {
2233                 kunmap(page);
2234                 return 0; /* don't care */
2235         }
2236
2237         /* check to make sure that we are not extending the file */
2238         if (mapping->host->i_size - offset < (loff_t)to)
2239                 to = (unsigned)(mapping->host->i_size - offset);
2240
2241         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2242                                     &open_file);
2243         if (!rc) {
2244                 bytes_written = cifs_write(open_file, open_file->pid,
2245                                            write_data, to - from, &offset);
2246                 cifsFileInfo_put(open_file);
2247                 /* Does mm or vfs already set times? */
2248                 inode->i_atime = inode->i_mtime = current_time(inode);
2249                 if ((bytes_written > 0) && (offset))
2250                         rc = 0;
2251                 else if (bytes_written < 0)
2252                         rc = bytes_written;
2253                 else
2254                         rc = -EFAULT;
2255         } else {
2256                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2257                 if (!is_retryable_error(rc))
2258                         rc = -EIO;
2259         }
2260
2261         kunmap(page);
2262         return rc;
2263 }
2264
2265 static struct cifs_writedata *
2266 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2267                           pgoff_t end, pgoff_t *index,
2268                           unsigned int *found_pages)
2269 {
2270         struct cifs_writedata *wdata;
2271
2272         wdata = cifs_writedata_alloc((unsigned int)tofind,
2273                                      cifs_writev_complete);
2274         if (!wdata)
2275                 return NULL;
2276
2277         *found_pages = find_get_pages_range_tag(mapping, index, end,
2278                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2279         return wdata;
2280 }
2281
2282 static unsigned int
2283 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2284                     struct address_space *mapping,
2285                     struct writeback_control *wbc,
2286                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2287 {
2288         unsigned int nr_pages = 0, i;
2289         struct page *page;
2290
2291         for (i = 0; i < found_pages; i++) {
2292                 page = wdata->pages[i];
2293                 /*
2294                  * At this point we hold neither the i_pages lock nor the
2295                  * page lock: the page may be truncated or invalidated
2296                  * (changing page->mapping to NULL), or even swizzled
2297                  * back from swapper_space to tmpfs file mapping
2298                  */
2299
2300                 if (nr_pages == 0)
2301                         lock_page(page);
2302                 else if (!trylock_page(page))
2303                         break;
2304
2305                 if (unlikely(page->mapping != mapping)) {
2306                         unlock_page(page);
2307                         break;
2308                 }
2309
2310                 if (!wbc->range_cyclic && page->index > end) {
2311                         *done = true;
2312                         unlock_page(page);
2313                         break;
2314                 }
2315
2316                 if (*next && (page->index != *next)) {
2317                         /* Not next consecutive page */
2318                         unlock_page(page);
2319                         break;
2320                 }
2321
2322                 if (wbc->sync_mode != WB_SYNC_NONE)
2323                         wait_on_page_writeback(page);
2324
2325                 if (PageWriteback(page) ||
2326                                 !clear_page_dirty_for_io(page)) {
2327                         unlock_page(page);
2328                         break;
2329                 }
2330
2331                 /*
2332                  * This actually clears the dirty bit in the radix tree.
2333                  * See cifs_writepage() for more commentary.
2334                  */
2335                 set_page_writeback(page);
2336                 if (page_offset(page) >= i_size_read(mapping->host)) {
2337                         *done = true;
2338                         unlock_page(page);
2339                         end_page_writeback(page);
2340                         break;
2341                 }
2342
2343                 wdata->pages[i] = page;
2344                 *next = page->index + 1;
2345                 ++nr_pages;
2346         }
2347
2348         /* reset index to refind any pages skipped */
2349         if (nr_pages == 0)
2350                 *index = wdata->pages[0]->index + 1;
2351
2352         /* put any pages we aren't going to use */
2353         for (i = nr_pages; i < found_pages; i++) {
2354                 put_page(wdata->pages[i]);
2355                 wdata->pages[i] = NULL;
2356         }
2357
2358         return nr_pages;
2359 }
2360
2361 static int
2362 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2363                  struct address_space *mapping, struct writeback_control *wbc)
2364 {
2365         int rc;
2366
2367         wdata->sync_mode = wbc->sync_mode;
2368         wdata->nr_pages = nr_pages;
2369         wdata->offset = page_offset(wdata->pages[0]);
2370         wdata->pagesz = PAGE_SIZE;
2371         wdata->tailsz = min(i_size_read(mapping->host) -
2372                         page_offset(wdata->pages[nr_pages - 1]),
2373                         (loff_t)PAGE_SIZE);
2374         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2375         wdata->pid = wdata->cfile->pid;
2376
2377         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2378         if (rc)
2379                 return rc;
2380
2381         if (wdata->cfile->invalidHandle)
2382                 rc = -EAGAIN;
2383         else
2384                 rc = wdata->server->ops->async_writev(wdata,
2385                                                       cifs_writedata_release);
2386
2387         return rc;
2388 }
2389
2390 static int cifs_writepages(struct address_space *mapping,
2391                            struct writeback_control *wbc)
2392 {
2393         struct inode *inode = mapping->host;
2394         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2395         struct TCP_Server_Info *server;
2396         bool done = false, scanned = false, range_whole = false;
2397         pgoff_t end, index;
2398         struct cifs_writedata *wdata;
2399         struct cifsFileInfo *cfile = NULL;
2400         int rc = 0;
2401         int saved_rc = 0;
2402         unsigned int xid;
2403
2404         /*
2405          * If wsize is smaller than the page cache size, default to writing
2406          * one page at a time via cifs_writepage
2407          */
2408         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2409                 return generic_writepages(mapping, wbc);
2410
2411         xid = get_xid();
2412         if (wbc->range_cyclic) {
2413                 index = mapping->writeback_index; /* Start from prev offset */
2414                 end = -1;
2415         } else {
2416                 index = wbc->range_start >> PAGE_SHIFT;
2417                 end = wbc->range_end >> PAGE_SHIFT;
2418                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2419                         range_whole = true;
2420                 scanned = true;
2421         }
2422         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2423
2424 retry:
2425         while (!done && index <= end) {
2426                 unsigned int i, nr_pages, found_pages, wsize;
2427                 pgoff_t next = 0, tofind, saved_index = index;
2428                 struct cifs_credits credits_on_stack;
2429                 struct cifs_credits *credits = &credits_on_stack;
2430                 int get_file_rc = 0;
2431
2432                 if (cfile)
2433                         cifsFileInfo_put(cfile);
2434
2435                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2436
2437                 /* in case of an error store it to return later */
2438                 if (rc)
2439                         get_file_rc = rc;
2440
2441                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2442                                                    &wsize, credits);
2443                 if (rc != 0) {
2444                         done = true;
2445                         break;
2446                 }
2447
2448                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2449
2450                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2451                                                   &found_pages);
2452                 if (!wdata) {
2453                         rc = -ENOMEM;
2454                         done = true;
2455                         add_credits_and_wake_if(server, credits, 0);
2456                         break;
2457                 }
2458
2459                 if (found_pages == 0) {
2460                         kref_put(&wdata->refcount, cifs_writedata_release);
2461                         add_credits_and_wake_if(server, credits, 0);
2462                         break;
2463                 }
2464
2465                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2466                                                end, &index, &next, &done);
2467
2468                 /* nothing to write? */
2469                 if (nr_pages == 0) {
2470                         kref_put(&wdata->refcount, cifs_writedata_release);
2471                         add_credits_and_wake_if(server, credits, 0);
2472                         continue;
2473                 }
2474
2475                 wdata->credits = credits_on_stack;
2476                 wdata->cfile = cfile;
2477                 wdata->server = server;
2478                 cfile = NULL;
2479
2480                 if (!wdata->cfile) {
2481                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2482                                  get_file_rc);
2483                         if (is_retryable_error(get_file_rc))
2484                                 rc = get_file_rc;
2485                         else
2486                                 rc = -EBADF;
2487                 } else
2488                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2489
2490                 for (i = 0; i < nr_pages; ++i)
2491                         unlock_page(wdata->pages[i]);
2492
2493                 /* send failure -- clean up the mess */
2494                 if (rc != 0) {
2495                         add_credits_and_wake_if(server, &wdata->credits, 0);
2496                         for (i = 0; i < nr_pages; ++i) {
2497                                 if (is_retryable_error(rc))
2498                                         redirty_page_for_writepage(wbc,
2499                                                            wdata->pages[i]);
2500                                 else
2501                                         SetPageError(wdata->pages[i]);
2502                                 end_page_writeback(wdata->pages[i]);
2503                                 put_page(wdata->pages[i]);
2504                         }
2505                         if (!is_retryable_error(rc))
2506                                 mapping_set_error(mapping, rc);
2507                 }
2508                 kref_put(&wdata->refcount, cifs_writedata_release);
2509
2510                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2511                         index = saved_index;
2512                         continue;
2513                 }
2514
2515                 /* Return immediately if we received a signal during writing */
2516                 if (is_interrupt_error(rc)) {
2517                         done = true;
2518                         break;
2519                 }
2520
2521                 if (rc != 0 && saved_rc == 0)
2522                         saved_rc = rc;
2523
2524                 wbc->nr_to_write -= nr_pages;
2525                 if (wbc->nr_to_write <= 0)
2526                         done = true;
2527
2528                 index = next;
2529         }
2530
2531         if (!scanned && !done) {
2532                 /*
2533                  * We hit the last page and there is more work to be done: wrap
2534                  * back to the start of the file
2535                  */
2536                 scanned = true;
2537                 index = 0;
2538                 goto retry;
2539         }
2540
2541         if (saved_rc != 0)
2542                 rc = saved_rc;
2543
2544         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2545                 mapping->writeback_index = index;
2546
2547         if (cfile)
2548                 cifsFileInfo_put(cfile);
2549         free_xid(xid);
2550         /* Indication to update ctime and mtime as close is deferred */
2551         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2552         return rc;
2553 }
2554
2555 static int
2556 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2557 {
2558         int rc;
2559         unsigned int xid;
2560
2561         xid = get_xid();
2562 /* BB add check for wbc flags */
2563         get_page(page);
2564         if (!PageUptodate(page))
2565                 cifs_dbg(FYI, "ppw - page not up to date\n");
2566
2567         /*
2568          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2569          *
2570          * A writepage() implementation always needs to do either this,
2571          * or re-dirty the page with "redirty_page_for_writepage()" in
2572          * the case of a failure.
2573          *
2574          * Just unlocking the page will cause the radix tree tag-bits
2575          * to fail to update with the state of the page correctly.
2576          */
2577         set_page_writeback(page);
2578 retry_write:
2579         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2580         if (is_retryable_error(rc)) {
2581                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2582                         goto retry_write;
2583                 redirty_page_for_writepage(wbc, page);
2584         } else if (rc != 0) {
2585                 SetPageError(page);
2586                 mapping_set_error(page->mapping, rc);
2587         } else {
2588                 SetPageUptodate(page);
2589         }
2590         end_page_writeback(page);
2591         put_page(page);
2592         free_xid(xid);
2593         return rc;
2594 }
2595
2596 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2597 {
2598         int rc = cifs_writepage_locked(page, wbc);
2599         unlock_page(page);
2600         return rc;
2601 }
2602
2603 static int cifs_write_end(struct file *file, struct address_space *mapping,
2604                         loff_t pos, unsigned len, unsigned copied,
2605                         struct page *page, void *fsdata)
2606 {
2607         int rc;
2608         struct inode *inode = mapping->host;
2609         struct cifsFileInfo *cfile = file->private_data;
2610         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2611         __u32 pid;
2612
2613         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2614                 pid = cfile->pid;
2615         else
2616                 pid = current->tgid;
2617
2618         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2619                  page, pos, copied);
2620
2621         if (PageChecked(page)) {
2622                 if (copied == len)
2623                         SetPageUptodate(page);
2624                 ClearPageChecked(page);
2625         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2626                 SetPageUptodate(page);
2627
2628         if (!PageUptodate(page)) {
2629                 char *page_data;
2630                 unsigned offset = pos & (PAGE_SIZE - 1);
2631                 unsigned int xid;
2632
2633                 xid = get_xid();
2634                 /* this is probably better than directly calling
2635                    partialpage_write since in this function the file handle is
2636                    known which we might as well leverage */
2637                 /* BB check if anything else missing out of ppw
2638                    such as updating last write time */
2639                 page_data = kmap(page);
2640                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2641                 /* if (rc < 0) should we set writebehind rc? */
2642                 kunmap(page);
2643
2644                 free_xid(xid);
2645         } else {
2646                 rc = copied;
2647                 pos += copied;
2648                 set_page_dirty(page);
2649         }
2650
2651         if (rc > 0) {
2652                 spin_lock(&inode->i_lock);
2653                 if (pos > inode->i_size) {
2654                         i_size_write(inode, pos);
2655                         inode->i_blocks = (512 - 1 + pos) >> 9;
2656                 }
2657                 spin_unlock(&inode->i_lock);
2658         }
2659
2660         unlock_page(page);
2661         put_page(page);
2662         /* Indication to update ctime and mtime as close is deferred */
2663         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2664
2665         return rc;
2666 }
2667
2668 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2669                       int datasync)
2670 {
2671         unsigned int xid;
2672         int rc = 0;
2673         struct cifs_tcon *tcon;
2674         struct TCP_Server_Info *server;
2675         struct cifsFileInfo *smbfile = file->private_data;
2676         struct inode *inode = file_inode(file);
2677         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2678
2679         rc = file_write_and_wait_range(file, start, end);
2680         if (rc) {
2681                 trace_cifs_fsync_err(inode->i_ino, rc);
2682                 return rc;
2683         }
2684
2685         xid = get_xid();
2686
2687         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2688                  file, datasync);
2689
2690         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2691                 rc = cifs_zap_mapping(inode);
2692                 if (rc) {
2693                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2694                         rc = 0; /* don't care about it in fsync */
2695                 }
2696         }
2697
2698         tcon = tlink_tcon(smbfile->tlink);
2699         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2700                 server = tcon->ses->server;
2701                 if (server->ops->flush == NULL) {
2702                         rc = -ENOSYS;
2703                         goto strict_fsync_exit;
2704                 }
2705
2706                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2707                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2708                         if (smbfile) {
2709                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2710                                 cifsFileInfo_put(smbfile);
2711                         } else
2712                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2713                 } else
2714                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2715         }
2716
2717 strict_fsync_exit:
2718         free_xid(xid);
2719         return rc;
2720 }
2721
2722 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2723 {
2724         unsigned int xid;
2725         int rc = 0;
2726         struct cifs_tcon *tcon;
2727         struct TCP_Server_Info *server;
2728         struct cifsFileInfo *smbfile = file->private_data;
2729         struct inode *inode = file_inode(file);
2730         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2731
2732         rc = file_write_and_wait_range(file, start, end);
2733         if (rc) {
2734                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2735                 return rc;
2736         }
2737
2738         xid = get_xid();
2739
2740         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2741                  file, datasync);
2742
2743         tcon = tlink_tcon(smbfile->tlink);
2744         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2745                 server = tcon->ses->server;
2746                 if (server->ops->flush == NULL) {
2747                         rc = -ENOSYS;
2748                         goto fsync_exit;
2749                 }
2750
2751                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2752                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2753                         if (smbfile) {
2754                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2755                                 cifsFileInfo_put(smbfile);
2756                         } else
2757                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2758                 } else
2759                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2760         }
2761
2762 fsync_exit:
2763         free_xid(xid);
2764         return rc;
2765 }
2766
2767 /*
2768  * As file closes, flush all cached write data for this inode checking
2769  * for write behind errors.
2770  */
2771 int cifs_flush(struct file *file, fl_owner_t id)
2772 {
2773         struct inode *inode = file_inode(file);
2774         int rc = 0;
2775
2776         if (file->f_mode & FMODE_WRITE)
2777                 rc = filemap_write_and_wait(inode->i_mapping);
2778
2779         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2780         if (rc)
2781                 trace_cifs_flush_err(inode->i_ino, rc);
2782         return rc;
2783 }
2784
2785 static int
2786 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2787 {
2788         int rc = 0;
2789         unsigned long i;
2790
2791         for (i = 0; i < num_pages; i++) {
2792                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2793                 if (!pages[i]) {
2794                         /*
2795                          * save number of pages we have already allocated and
2796                          * return with ENOMEM error
2797                          */
2798                         num_pages = i;
2799                         rc = -ENOMEM;
2800                         break;
2801                 }
2802         }
2803
2804         if (rc) {
2805                 for (i = 0; i < num_pages; i++)
2806                         put_page(pages[i]);
2807         }
2808         return rc;
2809 }
2810
2811 static inline
2812 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2813 {
2814         size_t num_pages;
2815         size_t clen;
2816
2817         clen = min_t(const size_t, len, wsize);
2818         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2819
2820         if (cur_len)
2821                 *cur_len = clen;
2822
2823         return num_pages;
2824 }
2825
2826 static void
2827 cifs_uncached_writedata_release(struct kref *refcount)
2828 {
2829         int i;
2830         struct cifs_writedata *wdata = container_of(refcount,
2831                                         struct cifs_writedata, refcount);
2832
2833         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2834         for (i = 0; i < wdata->nr_pages; i++)
2835                 put_page(wdata->pages[i]);
2836         cifs_writedata_release(refcount);
2837 }
2838
2839 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2840
2841 static void
2842 cifs_uncached_writev_complete(struct work_struct *work)
2843 {
2844         struct cifs_writedata *wdata = container_of(work,
2845                                         struct cifs_writedata, work);
2846         struct inode *inode = d_inode(wdata->cfile->dentry);
2847         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2848
2849         spin_lock(&inode->i_lock);
2850         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2851         if (cifsi->server_eof > inode->i_size)
2852                 i_size_write(inode, cifsi->server_eof);
2853         spin_unlock(&inode->i_lock);
2854
2855         complete(&wdata->done);
2856         collect_uncached_write_data(wdata->ctx);
2857         /* the below call can possibly free the last ref to aio ctx */
2858         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2859 }
2860
2861 static int
2862 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2863                       size_t *len, unsigned long *num_pages)
2864 {
2865         size_t save_len, copied, bytes, cur_len = *len;
2866         unsigned long i, nr_pages = *num_pages;
2867
2868         save_len = cur_len;
2869         for (i = 0; i < nr_pages; i++) {
2870                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2871                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2872                 cur_len -= copied;
2873                 /*
2874                  * If we didn't copy as much as we expected, then that
2875                  * may mean we trod into an unmapped area. Stop copying
2876                  * at that point. On the next pass through the big
2877                  * loop, we'll likely end up getting a zero-length
2878                  * write and bailing out of it.
2879                  */
2880                 if (copied < bytes)
2881                         break;
2882         }
2883         cur_len = save_len - cur_len;
2884         *len = cur_len;
2885
2886         /*
2887          * If we have no data to send, then that probably means that
2888          * the copy above failed altogether. That's most likely because
2889          * the address in the iovec was bogus. Return -EFAULT and let
2890          * the caller free anything we allocated and bail out.
2891          */
2892         if (!cur_len)
2893                 return -EFAULT;
2894
2895         /*
2896          * i + 1 now represents the number of pages we actually used in
2897          * the copy phase above.
2898          */
2899         *num_pages = i + 1;
2900         return 0;
2901 }
2902
2903 static int
2904 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2905         struct cifs_aio_ctx *ctx)
2906 {
2907         unsigned int wsize;
2908         struct cifs_credits credits;
2909         int rc;
2910         struct TCP_Server_Info *server = wdata->server;
2911
2912         do {
2913                 if (wdata->cfile->invalidHandle) {
2914                         rc = cifs_reopen_file(wdata->cfile, false);
2915                         if (rc == -EAGAIN)
2916                                 continue;
2917                         else if (rc)
2918                                 break;
2919                 }
2920
2921
2922                 /*
2923                  * Wait for credits to resend this wdata.
2924                  * Note: we are attempting to resend the whole wdata not in
2925                  * segments
2926                  */
2927                 do {
2928                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2929                                                 &wsize, &credits);
2930                         if (rc)
2931                                 goto fail;
2932
2933                         if (wsize < wdata->bytes) {
2934                                 add_credits_and_wake_if(server, &credits, 0);
2935                                 msleep(1000);
2936                         }
2937                 } while (wsize < wdata->bytes);
2938                 wdata->credits = credits;
2939
2940                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2941
2942                 if (!rc) {
2943                         if (wdata->cfile->invalidHandle)
2944                                 rc = -EAGAIN;
2945                         else {
2946 #ifdef CONFIG_CIFS_SMB_DIRECT
2947                                 if (wdata->mr) {
2948                                         wdata->mr->need_invalidate = true;
2949                                         smbd_deregister_mr(wdata->mr);
2950                                         wdata->mr = NULL;
2951                                 }
2952 #endif
2953                                 rc = server->ops->async_writev(wdata,
2954                                         cifs_uncached_writedata_release);
2955                         }
2956                 }
2957
2958                 /* If the write was successfully sent, we are done */
2959                 if (!rc) {
2960                         list_add_tail(&wdata->list, wdata_list);
2961                         return 0;
2962                 }
2963
2964                 /* Roll back credits and retry if needed */
2965                 add_credits_and_wake_if(server, &wdata->credits, 0);
2966         } while (rc == -EAGAIN);
2967
2968 fail:
2969         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2970         return rc;
2971 }
2972
2973 static int
2974 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2975                      struct cifsFileInfo *open_file,
2976                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2977                      struct cifs_aio_ctx *ctx)
2978 {
2979         int rc = 0;
2980         size_t cur_len;
2981         unsigned long nr_pages, num_pages, i;
2982         struct cifs_writedata *wdata;
2983         struct iov_iter saved_from = *from;
2984         loff_t saved_offset = offset;
2985         pid_t pid;
2986         struct TCP_Server_Info *server;
2987         struct page **pagevec;
2988         size_t start;
2989         unsigned int xid;
2990
2991         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2992                 pid = open_file->pid;
2993         else
2994                 pid = current->tgid;
2995
2996         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2997         xid = get_xid();
2998
2999         do {
3000                 unsigned int wsize;
3001                 struct cifs_credits credits_on_stack;
3002                 struct cifs_credits *credits = &credits_on_stack;
3003
3004                 if (open_file->invalidHandle) {
3005                         rc = cifs_reopen_file(open_file, false);
3006                         if (rc == -EAGAIN)
3007                                 continue;
3008                         else if (rc)
3009                                 break;
3010                 }
3011
3012                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3013                                                    &wsize, credits);
3014                 if (rc)
3015                         break;
3016
3017                 cur_len = min_t(const size_t, len, wsize);
3018
3019                 if (ctx->direct_io) {
3020                         ssize_t result;
3021
3022                         result = iov_iter_get_pages_alloc(
3023                                 from, &pagevec, cur_len, &start);
3024                         if (result < 0) {
3025                                 cifs_dbg(VFS,
3026                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3027                                          result, iov_iter_type(from),
3028                                          from->iov_offset, from->count);
3029                                 dump_stack();
3030
3031                                 rc = result;
3032                                 add_credits_and_wake_if(server, credits, 0);
3033                                 break;
3034                         }
3035                         cur_len = (size_t)result;
3036                         iov_iter_advance(from, cur_len);
3037
3038                         nr_pages =
3039                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3040
3041                         wdata = cifs_writedata_direct_alloc(pagevec,
3042                                              cifs_uncached_writev_complete);
3043                         if (!wdata) {
3044                                 rc = -ENOMEM;
3045                                 add_credits_and_wake_if(server, credits, 0);
3046                                 break;
3047                         }
3048
3049
3050                         wdata->page_offset = start;
3051                         wdata->tailsz =
3052                                 nr_pages > 1 ?
3053                                         cur_len - (PAGE_SIZE - start) -
3054                                         (nr_pages - 2) * PAGE_SIZE :
3055                                         cur_len;
3056                 } else {
3057                         nr_pages = get_numpages(wsize, len, &cur_len);
3058                         wdata = cifs_writedata_alloc(nr_pages,
3059                                              cifs_uncached_writev_complete);
3060                         if (!wdata) {
3061                                 rc = -ENOMEM;
3062                                 add_credits_and_wake_if(server, credits, 0);
3063                                 break;
3064                         }
3065
3066                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3067                         if (rc) {
3068                                 kvfree(wdata->pages);
3069                                 kfree(wdata);
3070                                 add_credits_and_wake_if(server, credits, 0);
3071                                 break;
3072                         }
3073
3074                         num_pages = nr_pages;
3075                         rc = wdata_fill_from_iovec(
3076                                 wdata, from, &cur_len, &num_pages);
3077                         if (rc) {
3078                                 for (i = 0; i < nr_pages; i++)
3079                                         put_page(wdata->pages[i]);
3080                                 kvfree(wdata->pages);
3081                                 kfree(wdata);
3082                                 add_credits_and_wake_if(server, credits, 0);
3083                                 break;
3084                         }
3085
3086                         /*
3087                          * Bring nr_pages down to the number of pages we
3088                          * actually used, and free any pages that we didn't use.
3089                          */
3090                         for ( ; nr_pages > num_pages; nr_pages--)
3091                                 put_page(wdata->pages[nr_pages - 1]);
3092
3093                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3094                 }
3095
3096                 wdata->sync_mode = WB_SYNC_ALL;
3097                 wdata->nr_pages = nr_pages;
3098                 wdata->offset = (__u64)offset;
3099                 wdata->cfile = cifsFileInfo_get(open_file);
3100                 wdata->server = server;
3101                 wdata->pid = pid;
3102                 wdata->bytes = cur_len;
3103                 wdata->pagesz = PAGE_SIZE;
3104                 wdata->credits = credits_on_stack;
3105                 wdata->ctx = ctx;
3106                 kref_get(&ctx->refcount);
3107
3108                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3109
3110                 if (!rc) {
3111                         if (wdata->cfile->invalidHandle)
3112                                 rc = -EAGAIN;
3113                         else
3114                                 rc = server->ops->async_writev(wdata,
3115                                         cifs_uncached_writedata_release);
3116                 }
3117
3118                 if (rc) {
3119                         add_credits_and_wake_if(server, &wdata->credits, 0);
3120                         kref_put(&wdata->refcount,
3121                                  cifs_uncached_writedata_release);
3122                         if (rc == -EAGAIN) {
3123                                 *from = saved_from;
3124                                 iov_iter_advance(from, offset - saved_offset);
3125                                 continue;
3126                         }
3127                         break;
3128                 }
3129
3130                 list_add_tail(&wdata->list, wdata_list);
3131                 offset += cur_len;
3132                 len -= cur_len;
3133         } while (len > 0);
3134
3135         free_xid(xid);
3136         return rc;
3137 }
3138
3139 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3140 {
3141         struct cifs_writedata *wdata, *tmp;
3142         struct cifs_tcon *tcon;
3143         struct cifs_sb_info *cifs_sb;
3144         struct dentry *dentry = ctx->cfile->dentry;
3145         ssize_t rc;
3146
3147         tcon = tlink_tcon(ctx->cfile->tlink);
3148         cifs_sb = CIFS_SB(dentry->d_sb);
3149
3150         mutex_lock(&ctx->aio_mutex);
3151
3152         if (list_empty(&ctx->list)) {
3153                 mutex_unlock(&ctx->aio_mutex);
3154                 return;
3155         }
3156
3157         rc = ctx->rc;
3158         /*
3159          * Wait for and collect replies for any successful sends in order of
3160          * increasing offset. Once an error is hit, then return without waiting
3161          * for any more replies.
3162          */
3163 restart_loop:
3164         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3165                 if (!rc) {
3166                         if (!try_wait_for_completion(&wdata->done)) {
3167                                 mutex_unlock(&ctx->aio_mutex);
3168                                 return;
3169                         }
3170
3171                         if (wdata->result)
3172                                 rc = wdata->result;
3173                         else
3174                                 ctx->total_len += wdata->bytes;
3175
3176                         /* resend call if it's a retryable error */
3177                         if (rc == -EAGAIN) {
3178                                 struct list_head tmp_list;
3179                                 struct iov_iter tmp_from = ctx->iter;
3180
3181                                 INIT_LIST_HEAD(&tmp_list);
3182                                 list_del_init(&wdata->list);
3183
3184                                 if (ctx->direct_io)
3185                                         rc = cifs_resend_wdata(
3186                                                 wdata, &tmp_list, ctx);
3187                                 else {
3188                                         iov_iter_advance(&tmp_from,
3189                                                  wdata->offset - ctx->pos);
3190
3191                                         rc = cifs_write_from_iter(wdata->offset,
3192                                                 wdata->bytes, &tmp_from,
3193                                                 ctx->cfile, cifs_sb, &tmp_list,
3194                                                 ctx);
3195
3196                                         kref_put(&wdata->refcount,
3197                                                 cifs_uncached_writedata_release);
3198                                 }
3199
3200                                 list_splice(&tmp_list, &ctx->list);
3201                                 goto restart_loop;
3202                         }
3203                 }
3204                 list_del_init(&wdata->list);
3205                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3206         }
3207
3208         cifs_stats_bytes_written(tcon, ctx->total_len);
3209         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3210
3211         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3212
3213         mutex_unlock(&ctx->aio_mutex);
3214
3215         if (ctx->iocb && ctx->iocb->ki_complete)
3216                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3217         else
3218                 complete(&ctx->done);
3219 }
3220
3221 static ssize_t __cifs_writev(
3222         struct kiocb *iocb, struct iov_iter *from, bool direct)
3223 {
3224         struct file *file = iocb->ki_filp;
3225         ssize_t total_written = 0;
3226         struct cifsFileInfo *cfile;
3227         struct cifs_tcon *tcon;
3228         struct cifs_sb_info *cifs_sb;
3229         struct cifs_aio_ctx *ctx;
3230         struct iov_iter saved_from = *from;
3231         size_t len = iov_iter_count(from);
3232         int rc;
3233
3234         /*
3235          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3236          * In this case, fall back to non-direct write function.
3237          * this could be improved by getting pages directly in ITER_KVEC
3238          */
3239         if (direct && iov_iter_is_kvec(from)) {
3240                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3241                 direct = false;
3242         }
3243
3244         rc = generic_write_checks(iocb, from);
3245         if (rc <= 0)
3246                 return rc;
3247
3248         cifs_sb = CIFS_FILE_SB(file);
3249         cfile = file->private_data;
3250         tcon = tlink_tcon(cfile->tlink);
3251
3252         if (!tcon->ses->server->ops->async_writev)
3253                 return -ENOSYS;
3254
3255         ctx = cifs_aio_ctx_alloc();
3256         if (!ctx)
3257                 return -ENOMEM;
3258
3259         ctx->cfile = cifsFileInfo_get(cfile);
3260
3261         if (!is_sync_kiocb(iocb))
3262                 ctx->iocb = iocb;
3263
3264         ctx->pos = iocb->ki_pos;
3265
3266         if (direct) {
3267                 ctx->direct_io = true;
3268                 ctx->iter = *from;
3269                 ctx->len = len;
3270         } else {
3271                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3272                 if (rc) {
3273                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3274                         return rc;
3275                 }
3276         }
3277
3278         /* grab a lock here due to read response handlers can access ctx */
3279         mutex_lock(&ctx->aio_mutex);
3280
3281         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3282                                   cfile, cifs_sb, &ctx->list, ctx);
3283
3284         /*
3285          * If at least one write was successfully sent, then discard any rc
3286          * value from the later writes. If the other write succeeds, then
3287          * we'll end up returning whatever was written. If it fails, then
3288          * we'll get a new rc value from that.
3289          */
3290         if (!list_empty(&ctx->list))
3291                 rc = 0;
3292
3293         mutex_unlock(&ctx->aio_mutex);
3294
3295         if (rc) {
3296                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3297                 return rc;
3298         }
3299
3300         if (!is_sync_kiocb(iocb)) {
3301                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3302                 return -EIOCBQUEUED;
3303         }
3304
3305         rc = wait_for_completion_killable(&ctx->done);
3306         if (rc) {
3307                 mutex_lock(&ctx->aio_mutex);
3308                 ctx->rc = rc = -EINTR;
3309                 total_written = ctx->total_len;
3310                 mutex_unlock(&ctx->aio_mutex);
3311         } else {
3312                 rc = ctx->rc;
3313                 total_written = ctx->total_len;
3314         }
3315
3316         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3317
3318         if (unlikely(!total_written))
3319                 return rc;
3320
3321         iocb->ki_pos += total_written;
3322         return total_written;
3323 }
3324
3325 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3326 {
3327         return __cifs_writev(iocb, from, true);
3328 }
3329
3330 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3331 {
3332         return __cifs_writev(iocb, from, false);
3333 }
3334
3335 static ssize_t
3336 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3337 {
3338         struct file *file = iocb->ki_filp;
3339         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3340         struct inode *inode = file->f_mapping->host;
3341         struct cifsInodeInfo *cinode = CIFS_I(inode);
3342         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3343         ssize_t rc;
3344
3345         inode_lock(inode);
3346         /*
3347          * We need to hold the sem to be sure nobody modifies lock list
3348          * with a brlock that prevents writing.
3349          */
3350         down_read(&cinode->lock_sem);
3351
3352         rc = generic_write_checks(iocb, from);
3353         if (rc <= 0)
3354                 goto out;
3355
3356         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3357                                      server->vals->exclusive_lock_type, 0,
3358                                      NULL, CIFS_WRITE_OP))
3359                 rc = __generic_file_write_iter(iocb, from);
3360         else
3361                 rc = -EACCES;
3362 out:
3363         up_read(&cinode->lock_sem);
3364         inode_unlock(inode);
3365
3366         if (rc > 0)
3367                 rc = generic_write_sync(iocb, rc);
3368         return rc;
3369 }
3370
3371 ssize_t
3372 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3373 {
3374         struct inode *inode = file_inode(iocb->ki_filp);
3375         struct cifsInodeInfo *cinode = CIFS_I(inode);
3376         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3377         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3378                                                 iocb->ki_filp->private_data;
3379         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3380         ssize_t written;
3381
3382         written = cifs_get_writer(cinode);
3383         if (written)
3384                 return written;
3385
3386         if (CIFS_CACHE_WRITE(cinode)) {
3387                 if (cap_unix(tcon->ses) &&
3388                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3389                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3390                         written = generic_file_write_iter(iocb, from);
3391                         goto out;
3392                 }
3393                 written = cifs_writev(iocb, from);
3394                 goto out;
3395         }
3396         /*
3397          * For non-oplocked files in strict cache mode we need to write the data
3398          * to the server exactly from the pos to pos+len-1 rather than flush all
3399          * affected pages because it may cause a error with mandatory locks on
3400          * these pages but not on the region from pos to ppos+len-1.
3401          */
3402         written = cifs_user_writev(iocb, from);
3403         if (CIFS_CACHE_READ(cinode)) {
3404                 /*
3405                  * We have read level caching and we have just sent a write
3406                  * request to the server thus making data in the cache stale.
3407                  * Zap the cache and set oplock/lease level to NONE to avoid
3408                  * reading stale data from the cache. All subsequent read
3409                  * operations will read new data from the server.
3410                  */
3411                 cifs_zap_mapping(inode);
3412                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3413                          inode);
3414                 cinode->oplock = 0;
3415         }
3416 out:
3417         cifs_put_writer(cinode);
3418         return written;
3419 }
3420
3421 static struct cifs_readdata *
3422 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3423 {
3424         struct cifs_readdata *rdata;
3425
3426         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3427         if (rdata != NULL) {
3428                 rdata->pages = pages;
3429                 kref_init(&rdata->refcount);
3430                 INIT_LIST_HEAD(&rdata->list);
3431                 init_completion(&rdata->done);
3432                 INIT_WORK(&rdata->work, complete);
3433         }
3434
3435         return rdata;
3436 }
3437
3438 static struct cifs_readdata *
3439 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3440 {
3441         struct page **pages =
3442                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3443         struct cifs_readdata *ret = NULL;
3444
3445         if (pages) {
3446                 ret = cifs_readdata_direct_alloc(pages, complete);
3447                 if (!ret)
3448                         kfree(pages);
3449         }
3450
3451         return ret;
3452 }
3453
3454 void
3455 cifs_readdata_release(struct kref *refcount)
3456 {
3457         struct cifs_readdata *rdata = container_of(refcount,
3458                                         struct cifs_readdata, refcount);
3459 #ifdef CONFIG_CIFS_SMB_DIRECT
3460         if (rdata->mr) {
3461                 smbd_deregister_mr(rdata->mr);
3462                 rdata->mr = NULL;
3463         }
3464 #endif
3465         if (rdata->cfile)
3466                 cifsFileInfo_put(rdata->cfile);
3467
3468         kvfree(rdata->pages);
3469         kfree(rdata);
3470 }
3471
3472 static int
3473 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3474 {
3475         int rc = 0;
3476         struct page *page;
3477         unsigned int i;
3478
3479         for (i = 0; i < nr_pages; i++) {
3480                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3481                 if (!page) {
3482                         rc = -ENOMEM;
3483                         break;
3484                 }
3485                 rdata->pages[i] = page;
3486         }
3487
3488         if (rc) {
3489                 unsigned int nr_page_failed = i;
3490
3491                 for (i = 0; i < nr_page_failed; i++) {
3492                         put_page(rdata->pages[i]);
3493                         rdata->pages[i] = NULL;
3494                 }
3495         }
3496         return rc;
3497 }
3498
3499 static void
3500 cifs_uncached_readdata_release(struct kref *refcount)
3501 {
3502         struct cifs_readdata *rdata = container_of(refcount,
3503                                         struct cifs_readdata, refcount);
3504         unsigned int i;
3505
3506         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3507         for (i = 0; i < rdata->nr_pages; i++) {
3508                 put_page(rdata->pages[i]);
3509         }
3510         cifs_readdata_release(refcount);
3511 }
3512
3513 /**
3514  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3515  * @rdata:      the readdata response with list of pages holding data
3516  * @iter:       destination for our data
3517  *
3518  * This function copies data from a list of pages in a readdata response into
3519  * an array of iovecs. It will first calculate where the data should go
3520  * based on the info in the readdata and then copy the data into that spot.
3521  */
3522 static int
3523 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3524 {
3525         size_t remaining = rdata->got_bytes;
3526         unsigned int i;
3527
3528         for (i = 0; i < rdata->nr_pages; i++) {
3529                 struct page *page = rdata->pages[i];
3530                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3531                 size_t written;
3532
3533                 if (unlikely(iov_iter_is_pipe(iter))) {
3534                         void *addr = kmap_atomic(page);
3535
3536                         written = copy_to_iter(addr, copy, iter);
3537                         kunmap_atomic(addr);
3538                 } else
3539                         written = copy_page_to_iter(page, 0, copy, iter);
3540                 remaining -= written;
3541                 if (written < copy && iov_iter_count(iter) > 0)
3542                         break;
3543         }
3544         return remaining ? -EFAULT : 0;
3545 }
3546
3547 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3548
3549 static void
3550 cifs_uncached_readv_complete(struct work_struct *work)
3551 {
3552         struct cifs_readdata *rdata = container_of(work,
3553                                                 struct cifs_readdata, work);
3554
3555         complete(&rdata->done);
3556         collect_uncached_read_data(rdata->ctx);
3557         /* the below call can possibly free the last ref to aio ctx */
3558         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3559 }
3560
3561 static int
3562 uncached_fill_pages(struct TCP_Server_Info *server,
3563                     struct cifs_readdata *rdata, struct iov_iter *iter,
3564                     unsigned int len)
3565 {
3566         int result = 0;
3567         unsigned int i;
3568         unsigned int nr_pages = rdata->nr_pages;
3569         unsigned int page_offset = rdata->page_offset;
3570
3571         rdata->got_bytes = 0;
3572         rdata->tailsz = PAGE_SIZE;
3573         for (i = 0; i < nr_pages; i++) {
3574                 struct page *page = rdata->pages[i];
3575                 size_t n;
3576                 unsigned int segment_size = rdata->pagesz;
3577
3578                 if (i == 0)
3579                         segment_size -= page_offset;
3580                 else
3581                         page_offset = 0;
3582
3583
3584                 if (len <= 0) {
3585                         /* no need to hold page hostage */
3586                         rdata->pages[i] = NULL;
3587                         rdata->nr_pages--;
3588                         put_page(page);
3589                         continue;
3590                 }
3591
3592                 n = len;
3593                 if (len >= segment_size)
3594                         /* enough data to fill the page */
3595                         n = segment_size;
3596                 else
3597                         rdata->tailsz = len;
3598                 len -= n;
3599
3600                 if (iter)
3601                         result = copy_page_from_iter(
3602                                         page, page_offset, n, iter);
3603 #ifdef CONFIG_CIFS_SMB_DIRECT
3604                 else if (rdata->mr)
3605                         result = n;
3606 #endif
3607                 else
3608                         result = cifs_read_page_from_socket(
3609                                         server, page, page_offset, n);
3610                 if (result < 0)
3611                         break;
3612
3613                 rdata->got_bytes += result;
3614         }
3615
3616         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3617                                                 rdata->got_bytes : result;
3618 }
3619
3620 static int
3621 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3622                               struct cifs_readdata *rdata, unsigned int len)
3623 {
3624         return uncached_fill_pages(server, rdata, NULL, len);
3625 }
3626
3627 static int
3628 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3629                               struct cifs_readdata *rdata,
3630                               struct iov_iter *iter)
3631 {
3632         return uncached_fill_pages(server, rdata, iter, iter->count);
3633 }
3634
3635 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3636                         struct list_head *rdata_list,
3637                         struct cifs_aio_ctx *ctx)
3638 {
3639         unsigned int rsize;
3640         struct cifs_credits credits;
3641         int rc;
3642         struct TCP_Server_Info *server;
3643
3644         /* XXX: should we pick a new channel here? */
3645         server = rdata->server;
3646
3647         do {
3648                 if (rdata->cfile->invalidHandle) {
3649                         rc = cifs_reopen_file(rdata->cfile, true);
3650                         if (rc == -EAGAIN)
3651                                 continue;
3652                         else if (rc)
3653                                 break;
3654                 }
3655
3656                 /*
3657                  * Wait for credits to resend this rdata.
3658                  * Note: we are attempting to resend the whole rdata not in
3659                  * segments
3660                  */
3661                 do {
3662                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3663                                                 &rsize, &credits);
3664
3665                         if (rc)
3666                                 goto fail;
3667
3668                         if (rsize < rdata->bytes) {
3669                                 add_credits_and_wake_if(server, &credits, 0);
3670                                 msleep(1000);
3671                         }
3672                 } while (rsize < rdata->bytes);
3673                 rdata->credits = credits;
3674
3675                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3676                 if (!rc) {
3677                         if (rdata->cfile->invalidHandle)
3678                                 rc = -EAGAIN;
3679                         else {
3680 #ifdef CONFIG_CIFS_SMB_DIRECT
3681                                 if (rdata->mr) {
3682                                         rdata->mr->need_invalidate = true;
3683                                         smbd_deregister_mr(rdata->mr);
3684                                         rdata->mr = NULL;
3685                                 }
3686 #endif
3687                                 rc = server->ops->async_readv(rdata);
3688                         }
3689                 }
3690
3691                 /* If the read was successfully sent, we are done */
3692                 if (!rc) {
3693                         /* Add to aio pending list */
3694                         list_add_tail(&rdata->list, rdata_list);
3695                         return 0;
3696                 }
3697
3698                 /* Roll back credits and retry if needed */
3699                 add_credits_and_wake_if(server, &rdata->credits, 0);
3700         } while (rc == -EAGAIN);
3701
3702 fail:
3703         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3704         return rc;
3705 }
3706
3707 static int
3708 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3709                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3710                      struct cifs_aio_ctx *ctx)
3711 {
3712         struct cifs_readdata *rdata;
3713         unsigned int npages, rsize;
3714         struct cifs_credits credits_on_stack;
3715         struct cifs_credits *credits = &credits_on_stack;
3716         size_t cur_len;
3717         int rc;
3718         pid_t pid;
3719         struct TCP_Server_Info *server;
3720         struct page **pagevec;
3721         size_t start;
3722         struct iov_iter direct_iov = ctx->iter;
3723
3724         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3725
3726         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3727                 pid = open_file->pid;
3728         else
3729                 pid = current->tgid;
3730
3731         if (ctx->direct_io)
3732                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3733
3734         do {
3735                 if (open_file->invalidHandle) {
3736                         rc = cifs_reopen_file(open_file, true);
3737                         if (rc == -EAGAIN)
3738                                 continue;
3739                         else if (rc)
3740                                 break;
3741                 }
3742
3743                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3744                                                    &rsize, credits);
3745                 if (rc)
3746                         break;
3747
3748                 cur_len = min_t(const size_t, len, rsize);
3749
3750                 if (ctx->direct_io) {
3751                         ssize_t result;
3752
3753                         result = iov_iter_get_pages_alloc(
3754                                         &direct_iov, &pagevec,
3755                                         cur_len, &start);
3756                         if (result < 0) {
3757                                 cifs_dbg(VFS,
3758                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3759                                          result, iov_iter_type(&direct_iov),
3760                                          direct_iov.iov_offset,
3761                                          direct_iov.count);
3762                                 dump_stack();
3763
3764                                 rc = result;
3765                                 add_credits_and_wake_if(server, credits, 0);
3766                                 break;
3767                         }
3768                         cur_len = (size_t)result;
3769                         iov_iter_advance(&direct_iov, cur_len);
3770
3771                         rdata = cifs_readdata_direct_alloc(
3772                                         pagevec, cifs_uncached_readv_complete);
3773                         if (!rdata) {
3774                                 add_credits_and_wake_if(server, credits, 0);
3775                                 rc = -ENOMEM;
3776                                 break;
3777                         }
3778
3779                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3780                         rdata->page_offset = start;
3781                         rdata->tailsz = npages > 1 ?
3782                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3783                                 cur_len;
3784
3785                 } else {
3786
3787                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3788                         /* allocate a readdata struct */
3789                         rdata = cifs_readdata_alloc(npages,
3790                                             cifs_uncached_readv_complete);
3791                         if (!rdata) {
3792                                 add_credits_and_wake_if(server, credits, 0);
3793                                 rc = -ENOMEM;
3794                                 break;
3795                         }
3796
3797                         rc = cifs_read_allocate_pages(rdata, npages);
3798                         if (rc) {
3799                                 kvfree(rdata->pages);
3800                                 kfree(rdata);
3801                                 add_credits_and_wake_if(server, credits, 0);
3802                                 break;
3803                         }
3804
3805                         rdata->tailsz = PAGE_SIZE;
3806                 }
3807
3808                 rdata->server = server;
3809                 rdata->cfile = cifsFileInfo_get(open_file);
3810                 rdata->nr_pages = npages;
3811                 rdata->offset = offset;
3812                 rdata->bytes = cur_len;
3813                 rdata->pid = pid;
3814                 rdata->pagesz = PAGE_SIZE;
3815                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3816                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3817                 rdata->credits = credits_on_stack;
3818                 rdata->ctx = ctx;
3819                 kref_get(&ctx->refcount);
3820
3821                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3822
3823                 if (!rc) {
3824                         if (rdata->cfile->invalidHandle)
3825                                 rc = -EAGAIN;
3826                         else
3827                                 rc = server->ops->async_readv(rdata);
3828                 }
3829
3830                 if (rc) {
3831                         add_credits_and_wake_if(server, &rdata->credits, 0);
3832                         kref_put(&rdata->refcount,
3833                                 cifs_uncached_readdata_release);
3834                         if (rc == -EAGAIN) {
3835                                 iov_iter_revert(&direct_iov, cur_len);
3836                                 continue;
3837                         }
3838                         break;
3839                 }
3840
3841                 list_add_tail(&rdata->list, rdata_list);
3842                 offset += cur_len;
3843                 len -= cur_len;
3844         } while (len > 0);
3845
3846         return rc;
3847 }
3848
3849 static void
3850 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3851 {
3852         struct cifs_readdata *rdata, *tmp;
3853         struct iov_iter *to = &ctx->iter;
3854         struct cifs_sb_info *cifs_sb;
3855         int rc;
3856
3857         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3858
3859         mutex_lock(&ctx->aio_mutex);
3860
3861         if (list_empty(&ctx->list)) {
3862                 mutex_unlock(&ctx->aio_mutex);
3863                 return;
3864         }
3865
3866         rc = ctx->rc;
3867         /* the loop below should proceed in the order of increasing offsets */
3868 again:
3869         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3870                 if (!rc) {
3871                         if (!try_wait_for_completion(&rdata->done)) {
3872                                 mutex_unlock(&ctx->aio_mutex);
3873                                 return;
3874                         }
3875
3876                         if (rdata->result == -EAGAIN) {
3877                                 /* resend call if it's a retryable error */
3878                                 struct list_head tmp_list;
3879                                 unsigned int got_bytes = rdata->got_bytes;
3880
3881                                 list_del_init(&rdata->list);
3882                                 INIT_LIST_HEAD(&tmp_list);
3883
3884                                 /*
3885                                  * Got a part of data and then reconnect has
3886                                  * happened -- fill the buffer and continue
3887                                  * reading.
3888                                  */
3889                                 if (got_bytes && got_bytes < rdata->bytes) {
3890                                         rc = 0;
3891                                         if (!ctx->direct_io)
3892                                                 rc = cifs_readdata_to_iov(rdata, to);
3893                                         if (rc) {
3894                                                 kref_put(&rdata->refcount,
3895                                                         cifs_uncached_readdata_release);
3896                                                 continue;
3897                                         }
3898                                 }
3899
3900                                 if (ctx->direct_io) {
3901                                         /*
3902                                          * Re-use rdata as this is a
3903                                          * direct I/O
3904                                          */
3905                                         rc = cifs_resend_rdata(
3906                                                 rdata,
3907                                                 &tmp_list, ctx);
3908                                 } else {
3909                                         rc = cifs_send_async_read(
3910                                                 rdata->offset + got_bytes,
3911                                                 rdata->bytes - got_bytes,
3912                                                 rdata->cfile, cifs_sb,
3913                                                 &tmp_list, ctx);
3914
3915                                         kref_put(&rdata->refcount,
3916                                                 cifs_uncached_readdata_release);
3917                                 }
3918
3919                                 list_splice(&tmp_list, &ctx->list);
3920
3921                                 goto again;
3922                         } else if (rdata->result)
3923                                 rc = rdata->result;
3924                         else if (!ctx->direct_io)
3925                                 rc = cifs_readdata_to_iov(rdata, to);
3926
3927                         /* if there was a short read -- discard anything left */
3928                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3929                                 rc = -ENODATA;
3930
3931                         ctx->total_len += rdata->got_bytes;
3932                 }
3933                 list_del_init(&rdata->list);
3934                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3935         }
3936
3937         if (!ctx->direct_io)
3938                 ctx->total_len = ctx->len - iov_iter_count(to);
3939
3940         /* mask nodata case */
3941         if (rc == -ENODATA)
3942                 rc = 0;
3943
3944         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3945
3946         mutex_unlock(&ctx->aio_mutex);
3947
3948         if (ctx->iocb && ctx->iocb->ki_complete)
3949                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3950         else
3951                 complete(&ctx->done);
3952 }
3953
3954 static ssize_t __cifs_readv(
3955         struct kiocb *iocb, struct iov_iter *to, bool direct)
3956 {
3957         size_t len;
3958         struct file *file = iocb->ki_filp;
3959         struct cifs_sb_info *cifs_sb;
3960         struct cifsFileInfo *cfile;
3961         struct cifs_tcon *tcon;
3962         ssize_t rc, total_read = 0;
3963         loff_t offset = iocb->ki_pos;
3964         struct cifs_aio_ctx *ctx;
3965
3966         /*
3967          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3968          * fall back to data copy read path
3969          * this could be improved by getting pages directly in ITER_KVEC
3970          */
3971         if (direct && iov_iter_is_kvec(to)) {
3972                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3973                 direct = false;
3974         }
3975
3976         len = iov_iter_count(to);
3977         if (!len)
3978                 return 0;
3979
3980         cifs_sb = CIFS_FILE_SB(file);
3981         cfile = file->private_data;
3982         tcon = tlink_tcon(cfile->tlink);
3983
3984         if (!tcon->ses->server->ops->async_readv)
3985                 return -ENOSYS;
3986
3987         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3988                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3989
3990         ctx = cifs_aio_ctx_alloc();
3991         if (!ctx)
3992                 return -ENOMEM;
3993
3994         ctx->cfile = cifsFileInfo_get(cfile);
3995
3996         if (!is_sync_kiocb(iocb))
3997                 ctx->iocb = iocb;
3998
3999         if (iter_is_iovec(to))
4000                 ctx->should_dirty = true;
4001
4002         if (direct) {
4003                 ctx->pos = offset;
4004                 ctx->direct_io = true;
4005                 ctx->iter = *to;
4006                 ctx->len = len;
4007         } else {
4008                 rc = setup_aio_ctx_iter(ctx, to, READ);
4009                 if (rc) {
4010                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4011                         return rc;
4012                 }
4013                 len = ctx->len;
4014         }
4015
4016         /* grab a lock here due to read response handlers can access ctx */
4017         mutex_lock(&ctx->aio_mutex);
4018
4019         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4020
4021         /* if at least one read request send succeeded, then reset rc */
4022         if (!list_empty(&ctx->list))
4023                 rc = 0;
4024
4025         mutex_unlock(&ctx->aio_mutex);
4026
4027         if (rc) {
4028                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4029                 return rc;
4030         }
4031
4032         if (!is_sync_kiocb(iocb)) {
4033                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4034                 return -EIOCBQUEUED;
4035         }
4036
4037         rc = wait_for_completion_killable(&ctx->done);
4038         if (rc) {
4039                 mutex_lock(&ctx->aio_mutex);
4040                 ctx->rc = rc = -EINTR;
4041                 total_read = ctx->total_len;
4042                 mutex_unlock(&ctx->aio_mutex);
4043         } else {
4044                 rc = ctx->rc;
4045                 total_read = ctx->total_len;
4046         }
4047
4048         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4049
4050         if (total_read) {
4051                 iocb->ki_pos += total_read;
4052                 return total_read;
4053         }
4054         return rc;
4055 }
4056
4057 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4058 {
4059         return __cifs_readv(iocb, to, true);
4060 }
4061
4062 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4063 {
4064         return __cifs_readv(iocb, to, false);
4065 }
4066
4067 ssize_t
4068 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4069 {
4070         struct inode *inode = file_inode(iocb->ki_filp);
4071         struct cifsInodeInfo *cinode = CIFS_I(inode);
4072         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4073         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4074                                                 iocb->ki_filp->private_data;
4075         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4076         int rc = -EACCES;
4077
4078         /*
4079          * In strict cache mode we need to read from the server all the time
4080          * if we don't have level II oplock because the server can delay mtime
4081          * change - so we can't make a decision about inode invalidating.
4082          * And we can also fail with pagereading if there are mandatory locks
4083          * on pages affected by this read but not on the region from pos to
4084          * pos+len-1.
4085          */
4086         if (!CIFS_CACHE_READ(cinode))
4087                 return cifs_user_readv(iocb, to);
4088
4089         if (cap_unix(tcon->ses) &&
4090             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4091             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4092                 return generic_file_read_iter(iocb, to);
4093
4094         /*
4095          * We need to hold the sem to be sure nobody modifies lock list
4096          * with a brlock that prevents reading.
4097          */
4098         down_read(&cinode->lock_sem);
4099         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4100                                      tcon->ses->server->vals->shared_lock_type,
4101                                      0, NULL, CIFS_READ_OP))
4102                 rc = generic_file_read_iter(iocb, to);
4103         up_read(&cinode->lock_sem);
4104         return rc;
4105 }
4106
4107 static ssize_t
4108 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4109 {
4110         int rc = -EACCES;
4111         unsigned int bytes_read = 0;
4112         unsigned int total_read;
4113         unsigned int current_read_size;
4114         unsigned int rsize;
4115         struct cifs_sb_info *cifs_sb;
4116         struct cifs_tcon *tcon;
4117         struct TCP_Server_Info *server;
4118         unsigned int xid;
4119         char *cur_offset;
4120         struct cifsFileInfo *open_file;
4121         struct cifs_io_parms io_parms = {0};
4122         int buf_type = CIFS_NO_BUFFER;
4123         __u32 pid;
4124
4125         xid = get_xid();
4126         cifs_sb = CIFS_FILE_SB(file);
4127
4128         /* FIXME: set up handlers for larger reads and/or convert to async */
4129         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4130
4131         if (file->private_data == NULL) {
4132                 rc = -EBADF;
4133                 free_xid(xid);
4134                 return rc;
4135         }
4136         open_file = file->private_data;
4137         tcon = tlink_tcon(open_file->tlink);
4138         server = cifs_pick_channel(tcon->ses);
4139
4140         if (!server->ops->sync_read) {
4141                 free_xid(xid);
4142                 return -ENOSYS;
4143         }
4144
4145         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4146                 pid = open_file->pid;
4147         else
4148                 pid = current->tgid;
4149
4150         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4151                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4152
4153         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4154              total_read += bytes_read, cur_offset += bytes_read) {
4155                 do {
4156                         current_read_size = min_t(uint, read_size - total_read,
4157                                                   rsize);
4158                         /*
4159                          * For windows me and 9x we do not want to request more
4160                          * than it negotiated since it will refuse the read
4161                          * then.
4162                          */
4163                         if (!(tcon->ses->capabilities &
4164                                 tcon->ses->server->vals->cap_large_files)) {
4165                                 current_read_size = min_t(uint,
4166                                         current_read_size, CIFSMaxBufSize);
4167                         }
4168                         if (open_file->invalidHandle) {
4169                                 rc = cifs_reopen_file(open_file, true);
4170                                 if (rc != 0)
4171                                         break;
4172                         }
4173                         io_parms.pid = pid;
4174                         io_parms.tcon = tcon;
4175                         io_parms.offset = *offset;
4176                         io_parms.length = current_read_size;
4177                         io_parms.server = server;
4178                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4179                                                     &bytes_read, &cur_offset,
4180                                                     &buf_type);
4181                 } while (rc == -EAGAIN);
4182
4183                 if (rc || (bytes_read == 0)) {
4184                         if (total_read) {
4185                                 break;
4186                         } else {
4187                                 free_xid(xid);
4188                                 return rc;
4189                         }
4190                 } else {
4191                         cifs_stats_bytes_read(tcon, total_read);
4192                         *offset += bytes_read;
4193                 }
4194         }
4195         free_xid(xid);
4196         return total_read;
4197 }
4198
4199 /*
4200  * If the page is mmap'ed into a process' page tables, then we need to make
4201  * sure that it doesn't change while being written back.
4202  */
4203 static vm_fault_t
4204 cifs_page_mkwrite(struct vm_fault *vmf)
4205 {
4206         struct page *page = vmf->page;
4207
4208 #ifdef CONFIG_CIFS_FSCACHE
4209         if (PageFsCache(page) &&
4210             wait_on_page_fscache_killable(page) < 0)
4211                 return VM_FAULT_RETRY;
4212 #endif
4213
4214         lock_page(page);
4215         return VM_FAULT_LOCKED;
4216 }
4217
4218 static const struct vm_operations_struct cifs_file_vm_ops = {
4219         .fault = filemap_fault,
4220         .map_pages = filemap_map_pages,
4221         .page_mkwrite = cifs_page_mkwrite,
4222 };
4223
4224 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4225 {
4226         int xid, rc = 0;
4227         struct inode *inode = file_inode(file);
4228
4229         xid = get_xid();
4230
4231         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4232                 rc = cifs_zap_mapping(inode);
4233         if (!rc)
4234                 rc = generic_file_mmap(file, vma);
4235         if (!rc)
4236                 vma->vm_ops = &cifs_file_vm_ops;
4237
4238         free_xid(xid);
4239         return rc;
4240 }
4241
4242 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4243 {
4244         int rc, xid;
4245
4246         xid = get_xid();
4247
4248         rc = cifs_revalidate_file(file);
4249         if (rc)
4250                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4251                          rc);
4252         if (!rc)
4253                 rc = generic_file_mmap(file, vma);
4254         if (!rc)
4255                 vma->vm_ops = &cifs_file_vm_ops;
4256
4257         free_xid(xid);
4258         return rc;
4259 }
4260
4261 static void
4262 cifs_readv_complete(struct work_struct *work)
4263 {
4264         unsigned int i, got_bytes;
4265         struct cifs_readdata *rdata = container_of(work,
4266                                                 struct cifs_readdata, work);
4267
4268         got_bytes = rdata->got_bytes;
4269         for (i = 0; i < rdata->nr_pages; i++) {
4270                 struct page *page = rdata->pages[i];
4271
4272                 lru_cache_add(page);
4273
4274                 if (rdata->result == 0 ||
4275                     (rdata->result == -EAGAIN && got_bytes)) {
4276                         flush_dcache_page(page);
4277                         SetPageUptodate(page);
4278                 } else
4279                         SetPageError(page);
4280
4281                 unlock_page(page);
4282
4283                 if (rdata->result == 0 ||
4284                     (rdata->result == -EAGAIN && got_bytes))
4285                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4286
4287                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4288
4289                 put_page(page);
4290                 rdata->pages[i] = NULL;
4291         }
4292         kref_put(&rdata->refcount, cifs_readdata_release);
4293 }
4294
4295 static int
4296 readpages_fill_pages(struct TCP_Server_Info *server,
4297                      struct cifs_readdata *rdata, struct iov_iter *iter,
4298                      unsigned int len)
4299 {
4300         int result = 0;
4301         unsigned int i;
4302         u64 eof;
4303         pgoff_t eof_index;
4304         unsigned int nr_pages = rdata->nr_pages;
4305         unsigned int page_offset = rdata->page_offset;
4306
4307         /* determine the eof that the server (probably) has */
4308         eof = CIFS_I(rdata->mapping->host)->server_eof;
4309         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4310         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4311
4312         rdata->got_bytes = 0;
4313         rdata->tailsz = PAGE_SIZE;
4314         for (i = 0; i < nr_pages; i++) {
4315                 struct page *page = rdata->pages[i];
4316                 unsigned int to_read = rdata->pagesz;
4317                 size_t n;
4318
4319                 if (i == 0)
4320                         to_read -= page_offset;
4321                 else
4322                         page_offset = 0;
4323
4324                 n = to_read;
4325
4326                 if (len >= to_read) {
4327                         len -= to_read;
4328                 } else if (len > 0) {
4329                         /* enough for partial page, fill and zero the rest */
4330                         zero_user(page, len + page_offset, to_read - len);
4331                         n = rdata->tailsz = len;
4332                         len = 0;
4333                 } else if (page->index > eof_index) {
4334                         /*
4335                          * The VFS will not try to do readahead past the
4336                          * i_size, but it's possible that we have outstanding
4337                          * writes with gaps in the middle and the i_size hasn't
4338                          * caught up yet. Populate those with zeroed out pages
4339                          * to prevent the VFS from repeatedly attempting to
4340                          * fill them until the writes are flushed.
4341                          */
4342                         zero_user(page, 0, PAGE_SIZE);
4343                         lru_cache_add(page);
4344                         flush_dcache_page(page);
4345                         SetPageUptodate(page);
4346                         unlock_page(page);
4347                         put_page(page);
4348                         rdata->pages[i] = NULL;
4349                         rdata->nr_pages--;
4350                         continue;
4351                 } else {
4352                         /* no need to hold page hostage */
4353                         lru_cache_add(page);
4354                         unlock_page(page);
4355                         put_page(page);
4356                         rdata->pages[i] = NULL;
4357                         rdata->nr_pages--;
4358                         continue;
4359                 }
4360
4361                 if (iter)
4362                         result = copy_page_from_iter(
4363                                         page, page_offset, n, iter);
4364 #ifdef CONFIG_CIFS_SMB_DIRECT
4365                 else if (rdata->mr)
4366                         result = n;
4367 #endif
4368                 else
4369                         result = cifs_read_page_from_socket(
4370                                         server, page, page_offset, n);
4371                 if (result < 0)
4372                         break;
4373
4374                 rdata->got_bytes += result;
4375         }
4376
4377         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4378                                                 rdata->got_bytes : result;
4379 }
4380
4381 static int
4382 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4383                                struct cifs_readdata *rdata, unsigned int len)
4384 {
4385         return readpages_fill_pages(server, rdata, NULL, len);
4386 }
4387
4388 static int
4389 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4390                                struct cifs_readdata *rdata,
4391                                struct iov_iter *iter)
4392 {
4393         return readpages_fill_pages(server, rdata, iter, iter->count);
4394 }
4395
4396 static int
4397 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4398                     unsigned int rsize, struct list_head *tmplist,
4399                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4400 {
4401         struct page *page, *tpage;
4402         unsigned int expected_index;
4403         int rc;
4404         gfp_t gfp = readahead_gfp_mask(mapping);
4405
4406         INIT_LIST_HEAD(tmplist);
4407
4408         page = lru_to_page(page_list);
4409
4410         /*
4411          * Lock the page and put it in the cache. Since no one else
4412          * should have access to this page, we're safe to simply set
4413          * PG_locked without checking it first.
4414          */
4415         __SetPageLocked(page);
4416         rc = add_to_page_cache_locked(page, mapping,
4417                                       page->index, gfp);
4418
4419         /* give up if we can't stick it in the cache */
4420         if (rc) {
4421                 __ClearPageLocked(page);
4422                 return rc;
4423         }
4424
4425         /* move first page to the tmplist */
4426         *offset = (loff_t)page->index << PAGE_SHIFT;
4427         *bytes = PAGE_SIZE;
4428         *nr_pages = 1;
4429         list_move_tail(&page->lru, tmplist);
4430
4431         /* now try and add more pages onto the request */
4432         expected_index = page->index + 1;
4433         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4434                 /* discontinuity ? */
4435                 if (page->index != expected_index)
4436                         break;
4437
4438                 /* would this page push the read over the rsize? */
4439                 if (*bytes + PAGE_SIZE > rsize)
4440                         break;
4441
4442                 __SetPageLocked(page);
4443                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4444                 if (rc) {
4445                         __ClearPageLocked(page);
4446                         break;
4447                 }
4448                 list_move_tail(&page->lru, tmplist);
4449                 (*bytes) += PAGE_SIZE;
4450                 expected_index++;
4451                 (*nr_pages)++;
4452         }
4453         return rc;
4454 }
4455
4456 static int cifs_readpages(struct file *file, struct address_space *mapping,
4457         struct list_head *page_list, unsigned num_pages)
4458 {
4459         int rc;
4460         int err = 0;
4461         struct list_head tmplist;
4462         struct cifsFileInfo *open_file = file->private_data;
4463         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4464         struct TCP_Server_Info *server;
4465         pid_t pid;
4466         unsigned int xid;
4467
4468         xid = get_xid();
4469         /*
4470          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4471          * immediately if the cookie is negative
4472          *
4473          * After this point, every page in the list might have PG_fscache set,
4474          * so we will need to clean that up off of every page we don't use.
4475          */
4476         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4477                                          &num_pages);
4478         if (rc == 0) {
4479                 free_xid(xid);
4480                 return rc;
4481         }
4482
4483         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4484                 pid = open_file->pid;
4485         else
4486                 pid = current->tgid;
4487
4488         rc = 0;
4489         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4490
4491         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4492                  __func__, file, mapping, num_pages);
4493
4494         /*
4495          * Start with the page at end of list and move it to private
4496          * list. Do the same with any following pages until we hit
4497          * the rsize limit, hit an index discontinuity, or run out of
4498          * pages. Issue the async read and then start the loop again
4499          * until the list is empty.
4500          *
4501          * Note that list order is important. The page_list is in
4502          * the order of declining indexes. When we put the pages in
4503          * the rdata->pages, then we want them in increasing order.
4504          */
4505         while (!list_empty(page_list) && !err) {
4506                 unsigned int i, nr_pages, bytes, rsize;
4507                 loff_t offset;
4508                 struct page *page, *tpage;
4509                 struct cifs_readdata *rdata;
4510                 struct cifs_credits credits_on_stack;
4511                 struct cifs_credits *credits = &credits_on_stack;
4512
4513                 if (open_file->invalidHandle) {
4514                         rc = cifs_reopen_file(open_file, true);
4515                         if (rc == -EAGAIN)
4516                                 continue;
4517                         else if (rc)
4518                                 break;
4519                 }
4520
4521                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4522                                                    &rsize, credits);
4523                 if (rc)
4524                         break;
4525
4526                 /*
4527                  * Give up immediately if rsize is too small to read an entire
4528                  * page. The VFS will fall back to readpage. We should never
4529                  * reach this point however since we set ra_pages to 0 when the
4530                  * rsize is smaller than a cache page.
4531                  */
4532                 if (unlikely(rsize < PAGE_SIZE)) {
4533                         add_credits_and_wake_if(server, credits, 0);
4534                         free_xid(xid);
4535                         return 0;
4536                 }
4537
4538                 nr_pages = 0;
4539                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4540                                          &nr_pages, &offset, &bytes);
4541                 if (!nr_pages) {
4542                         add_credits_and_wake_if(server, credits, 0);
4543                         break;
4544                 }
4545
4546                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4547                 if (!rdata) {
4548                         /* best to give up if we're out of mem */
4549                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4550                                 list_del(&page->lru);
4551                                 lru_cache_add(page);
4552                                 unlock_page(page);
4553                                 put_page(page);
4554                         }
4555                         rc = -ENOMEM;
4556                         add_credits_and_wake_if(server, credits, 0);
4557                         break;
4558                 }
4559
4560                 rdata->cfile = cifsFileInfo_get(open_file);
4561                 rdata->server = server;
4562                 rdata->mapping = mapping;
4563                 rdata->offset = offset;
4564                 rdata->bytes = bytes;
4565                 rdata->pid = pid;
4566                 rdata->pagesz = PAGE_SIZE;
4567                 rdata->tailsz = PAGE_SIZE;
4568                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4569                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4570                 rdata->credits = credits_on_stack;
4571
4572                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4573                         list_del(&page->lru);
4574                         rdata->pages[rdata->nr_pages++] = page;
4575                 }
4576
4577                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4578
4579                 if (!rc) {
4580                         if (rdata->cfile->invalidHandle)
4581                                 rc = -EAGAIN;
4582                         else
4583                                 rc = server->ops->async_readv(rdata);
4584                 }
4585
4586                 if (rc) {
4587                         add_credits_and_wake_if(server, &rdata->credits, 0);
4588                         for (i = 0; i < rdata->nr_pages; i++) {
4589                                 page = rdata->pages[i];
4590                                 lru_cache_add(page);
4591                                 unlock_page(page);
4592                                 put_page(page);
4593                         }
4594                         /* Fallback to the readpage in error/reconnect cases */
4595                         kref_put(&rdata->refcount, cifs_readdata_release);
4596                         break;
4597                 }
4598
4599                 kref_put(&rdata->refcount, cifs_readdata_release);
4600         }
4601
4602         free_xid(xid);
4603         return rc;
4604 }
4605
4606 /*
4607  * cifs_readpage_worker must be called with the page pinned
4608  */
4609 static int cifs_readpage_worker(struct file *file, struct page *page,
4610         loff_t *poffset)
4611 {
4612         char *read_data;
4613         int rc;
4614
4615         /* Is the page cached? */
4616         rc = cifs_readpage_from_fscache(file_inode(file), page);
4617         if (rc == 0)
4618                 goto read_complete;
4619
4620         read_data = kmap(page);
4621         /* for reads over a certain size could initiate async read ahead */
4622
4623         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4624
4625         if (rc < 0)
4626                 goto io_error;
4627         else
4628                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4629
4630         /* we do not want atime to be less than mtime, it broke some apps */
4631         file_inode(file)->i_atime = current_time(file_inode(file));
4632         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4633                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4634         else
4635                 file_inode(file)->i_atime = current_time(file_inode(file));
4636
4637         if (PAGE_SIZE > rc)
4638                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4639
4640         flush_dcache_page(page);
4641         SetPageUptodate(page);
4642
4643         /* send this page to the cache */
4644         cifs_readpage_to_fscache(file_inode(file), page);
4645
4646         rc = 0;
4647
4648 io_error:
4649         kunmap(page);
4650         unlock_page(page);
4651
4652 read_complete:
4653         return rc;
4654 }
4655
4656 static int cifs_readpage(struct file *file, struct page *page)
4657 {
4658         loff_t offset = page_file_offset(page);
4659         int rc = -EACCES;
4660         unsigned int xid;
4661
4662         xid = get_xid();
4663
4664         if (file->private_data == NULL) {
4665                 rc = -EBADF;
4666                 free_xid(xid);
4667                 return rc;
4668         }
4669
4670         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4671                  page, (int)offset, (int)offset);
4672
4673         rc = cifs_readpage_worker(file, page, &offset);
4674
4675         free_xid(xid);
4676         return rc;
4677 }
4678
4679 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4680 {
4681         struct cifsFileInfo *open_file;
4682
4683         spin_lock(&cifs_inode->open_file_lock);
4684         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4685                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4686                         spin_unlock(&cifs_inode->open_file_lock);
4687                         return 1;
4688                 }
4689         }
4690         spin_unlock(&cifs_inode->open_file_lock);
4691         return 0;
4692 }
4693
4694 /* We do not want to update the file size from server for inodes
4695    open for write - to avoid races with writepage extending
4696    the file - in the future we could consider allowing
4697    refreshing the inode only on increases in the file size
4698    but this is tricky to do without racing with writebehind
4699    page caching in the current Linux kernel design */
4700 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4701 {
4702         if (!cifsInode)
4703                 return true;
4704
4705         if (is_inode_writable(cifsInode)) {
4706                 /* This inode is open for write at least once */
4707                 struct cifs_sb_info *cifs_sb;
4708
4709                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4710                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4711                         /* since no page cache to corrupt on directio
4712                         we can change size safely */
4713                         return true;
4714                 }
4715
4716                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4717                         return true;
4718
4719                 return false;
4720         } else
4721                 return true;
4722 }
4723
4724 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4725                         loff_t pos, unsigned len, unsigned flags,
4726                         struct page **pagep, void **fsdata)
4727 {
4728         int oncethru = 0;
4729         pgoff_t index = pos >> PAGE_SHIFT;
4730         loff_t offset = pos & (PAGE_SIZE - 1);
4731         loff_t page_start = pos & PAGE_MASK;
4732         loff_t i_size;
4733         struct page *page;
4734         int rc = 0;
4735
4736         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4737
4738 start:
4739         page = grab_cache_page_write_begin(mapping, index, flags);
4740         if (!page) {
4741                 rc = -ENOMEM;
4742                 goto out;
4743         }
4744
4745         if (PageUptodate(page))
4746                 goto out;
4747
4748         /*
4749          * If we write a full page it will be up to date, no need to read from
4750          * the server. If the write is short, we'll end up doing a sync write
4751          * instead.
4752          */
4753         if (len == PAGE_SIZE)
4754                 goto out;
4755
4756         /*
4757          * optimize away the read when we have an oplock, and we're not
4758          * expecting to use any of the data we'd be reading in. That
4759          * is, when the page lies beyond the EOF, or straddles the EOF
4760          * and the write will cover all of the existing data.
4761          */
4762         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4763                 i_size = i_size_read(mapping->host);
4764                 if (page_start >= i_size ||
4765                     (offset == 0 && (pos + len) >= i_size)) {
4766                         zero_user_segments(page, 0, offset,
4767                                            offset + len,
4768                                            PAGE_SIZE);
4769                         /*
4770                          * PageChecked means that the parts of the page
4771                          * to which we're not writing are considered up
4772                          * to date. Once the data is copied to the
4773                          * page, it can be set uptodate.
4774                          */
4775                         SetPageChecked(page);
4776                         goto out;
4777                 }
4778         }
4779
4780         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4781                 /*
4782                  * might as well read a page, it is fast enough. If we get
4783                  * an error, we don't need to return it. cifs_write_end will
4784                  * do a sync write instead since PG_uptodate isn't set.
4785                  */
4786                 cifs_readpage_worker(file, page, &page_start);
4787                 put_page(page);
4788                 oncethru = 1;
4789                 goto start;
4790         } else {
4791                 /* we could try using another file handle if there is one -
4792                    but how would we lock it to prevent close of that handle
4793                    racing with this read? In any case
4794                    this will be written out by write_end so is fine */
4795         }
4796 out:
4797         *pagep = page;
4798         return rc;
4799 }
4800
4801 static int cifs_release_page(struct page *page, gfp_t gfp)
4802 {
4803         if (PagePrivate(page))
4804                 return 0;
4805         if (PageFsCache(page)) {
4806                 if (current_is_kswapd() || !(gfp & __GFP_FS))
4807                         return false;
4808                 wait_on_page_fscache(page);
4809         }
4810         fscache_note_page_release(cifs_inode_cookie(page->mapping->host));
4811         return true;
4812 }
4813
4814 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4815                                  unsigned int length)
4816 {
4817         wait_on_page_fscache(page);
4818 }
4819
4820 static int cifs_launder_page(struct page *page)
4821 {
4822         int rc = 0;
4823         loff_t range_start = page_offset(page);
4824         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4825         struct writeback_control wbc = {
4826                 .sync_mode = WB_SYNC_ALL,
4827                 .nr_to_write = 0,
4828                 .range_start = range_start,
4829                 .range_end = range_end,
4830         };
4831
4832         cifs_dbg(FYI, "Launder page: %p\n", page);
4833
4834         if (clear_page_dirty_for_io(page))
4835                 rc = cifs_writepage_locked(page, &wbc);
4836
4837         wait_on_page_fscache(page);
4838         return rc;
4839 }
4840
4841 void cifs_oplock_break(struct work_struct *work)
4842 {
4843         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4844                                                   oplock_break);
4845         struct inode *inode = d_inode(cfile->dentry);
4846         struct cifsInodeInfo *cinode = CIFS_I(inode);
4847         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4848         struct TCP_Server_Info *server = tcon->ses->server;
4849         int rc = 0;
4850         bool purge_cache = false;
4851         bool is_deferred = false;
4852         struct cifs_deferred_close *dclose;
4853
4854         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4855                         TASK_UNINTERRUPTIBLE);
4856
4857         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4858                                       cfile->oplock_epoch, &purge_cache);
4859
4860         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4861                                                 cifs_has_mand_locks(cinode)) {
4862                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4863                          inode);
4864                 cinode->oplock = 0;
4865         }
4866
4867         if (inode && S_ISREG(inode->i_mode)) {
4868                 if (CIFS_CACHE_READ(cinode))
4869                         break_lease(inode, O_RDONLY);
4870                 else
4871                         break_lease(inode, O_WRONLY);
4872                 rc = filemap_fdatawrite(inode->i_mapping);
4873                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4874                         rc = filemap_fdatawait(inode->i_mapping);
4875                         mapping_set_error(inode->i_mapping, rc);
4876                         cifs_zap_mapping(inode);
4877                 }
4878                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4879                 if (CIFS_CACHE_WRITE(cinode))
4880                         goto oplock_break_ack;
4881         }
4882
4883         rc = cifs_push_locks(cfile);
4884         if (rc)
4885                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4886
4887 oplock_break_ack:
4888         /*
4889          * When oplock break is received and there are no active
4890          * file handles but cached, then schedule deferred close immediately.
4891          * So, new open will not use cached handle.
4892          */
4893         spin_lock(&CIFS_I(inode)->deferred_lock);
4894         is_deferred = cifs_is_deferred_close(cfile, &dclose);
4895         spin_unlock(&CIFS_I(inode)->deferred_lock);
4896         if (is_deferred &&
4897             cfile->deferred_close_scheduled &&
4898             delayed_work_pending(&cfile->deferred)) {
4899                 if (cancel_delayed_work(&cfile->deferred)) {
4900                         _cifsFileInfo_put(cfile, false, false);
4901                         goto oplock_break_done;
4902                 }
4903         }
4904         /*
4905          * releasing stale oplock after recent reconnect of smb session using
4906          * a now incorrect file handle is not a data integrity issue but do
4907          * not bother sending an oplock release if session to server still is
4908          * disconnected since oplock already released by the server
4909          */
4910         if (!cfile->oplock_break_cancelled) {
4911                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4912                                                              cinode);
4913                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4914         }
4915 oplock_break_done:
4916         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4917         cifs_done_oplock_break(cinode);
4918 }
4919
4920 /*
4921  * The presence of cifs_direct_io() in the address space ops vector
4922  * allowes open() O_DIRECT flags which would have failed otherwise.
4923  *
4924  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4925  * so this method should never be called.
4926  *
4927  * Direct IO is not yet supported in the cached mode. 
4928  */
4929 static ssize_t
4930 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4931 {
4932         /*
4933          * FIXME
4934          * Eventually need to support direct IO for non forcedirectio mounts
4935          */
4936         return -EINVAL;
4937 }
4938
4939 static int cifs_swap_activate(struct swap_info_struct *sis,
4940                               struct file *swap_file, sector_t *span)
4941 {
4942         struct cifsFileInfo *cfile = swap_file->private_data;
4943         struct inode *inode = swap_file->f_mapping->host;
4944         unsigned long blocks;
4945         long long isize;
4946
4947         cifs_dbg(FYI, "swap activate\n");
4948
4949         spin_lock(&inode->i_lock);
4950         blocks = inode->i_blocks;
4951         isize = inode->i_size;
4952         spin_unlock(&inode->i_lock);
4953         if (blocks*512 < isize) {
4954                 pr_warn("swap activate: swapfile has holes\n");
4955                 return -EINVAL;
4956         }
4957         *span = sis->pages;
4958
4959         pr_warn_once("Swap support over SMB3 is experimental\n");
4960
4961         /*
4962          * TODO: consider adding ACL (or documenting how) to prevent other
4963          * users (on this or other systems) from reading it
4964          */
4965
4966
4967         /* TODO: add sk_set_memalloc(inet) or similar */
4968
4969         if (cfile)
4970                 cfile->swapfile = true;
4971         /*
4972          * TODO: Since file already open, we can't open with DENY_ALL here
4973          * but we could add call to grab a byte range lock to prevent others
4974          * from reading or writing the file
4975          */
4976
4977         return 0;
4978 }
4979
4980 static void cifs_swap_deactivate(struct file *file)
4981 {
4982         struct cifsFileInfo *cfile = file->private_data;
4983
4984         cifs_dbg(FYI, "swap deactivate\n");
4985
4986         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4987
4988         if (cfile)
4989                 cfile->swapfile = false;
4990
4991         /* do we need to unpin (or unlock) the file */
4992 }
4993
4994 /*
4995  * Mark a page as having been made dirty and thus needing writeback.  We also
4996  * need to pin the cache object to write back to.
4997  */
4998 #ifdef CONFIG_CIFS_FSCACHE
4999 static int cifs_set_page_dirty(struct page *page)
5000 {
5001         return fscache_set_page_dirty(page, cifs_inode_cookie(page->mapping->host));
5002 }
5003 #else
5004 #define cifs_set_page_dirty __set_page_dirty_nobuffers
5005 #endif
5006
5007 const struct address_space_operations cifs_addr_ops = {
5008         .readpage = cifs_readpage,
5009         .readpages = cifs_readpages,
5010         .writepage = cifs_writepage,
5011         .writepages = cifs_writepages,
5012         .write_begin = cifs_write_begin,
5013         .write_end = cifs_write_end,
5014         .set_page_dirty = cifs_set_page_dirty,
5015         .releasepage = cifs_release_page,
5016         .direct_IO = cifs_direct_io,
5017         .invalidatepage = cifs_invalidate_page,
5018         .launder_page = cifs_launder_page,
5019         /*
5020          * TODO: investigate and if useful we could add an cifs_migratePage
5021          * helper (under an CONFIG_MIGRATION) in the future, and also
5022          * investigate and add an is_dirty_writeback helper if needed
5023          */
5024         .swap_activate = cifs_swap_activate,
5025         .swap_deactivate = cifs_swap_deactivate,
5026 };
5027
5028 /*
5029  * cifs_readpages requires the server to support a buffer large enough to
5030  * contain the header plus one complete page of data.  Otherwise, we need
5031  * to leave cifs_readpages out of the address space operations.
5032  */
5033 const struct address_space_operations cifs_addr_ops_smallbuf = {
5034         .readpage = cifs_readpage,
5035         .writepage = cifs_writepage,
5036         .writepages = cifs_writepages,
5037         .write_begin = cifs_write_begin,
5038         .write_end = cifs_write_end,
5039         .set_page_dirty = cifs_set_page_dirty,
5040         .releasepage = cifs_release_page,
5041         .invalidatepage = cifs_invalidate_page,
5042         .launder_page = cifs_launder_page,
5043 };
This page took 0.324094 seconds and 4 git commands to generate.