Documentation/process/howto.rst: add a missing cross-reference
[linux.git] / fs / nfs / pnfs.c
1 /*
2  *  pNFS functions to call and manage layout drivers.
3  *
4  *  Copyright (c) 2002 [year of first publication]
5  *  The Regents of the University of Michigan
6  *  All Rights Reserved
7  *
8  *  Dean Hildebrand <dhildebz@umich.edu>
9  *
10  *  Permission is granted to use, copy, create derivative works, and
11  *  redistribute this software and such derivative works for any purpose,
12  *  so long as the name of the University of Michigan is not used in
13  *  any advertising or publicity pertaining to the use or distribution
14  *  of this software without specific, written prior authorization. If
15  *  the above copyright notice or any other identification of the
16  *  University of Michigan is included in any copy of any portion of
17  *  this software, then the disclaimer below must also be included.
18  *
19  *  This software is provided as is, without representation or warranty
20  *  of any kind either express or implied, including without limitation
21  *  the implied warranties of merchantability, fitness for a particular
22  *  purpose, or noninfringement.  The Regents of the University of
23  *  Michigan shall not be liable for any damages, including special,
24  *  indirect, incidental, or consequential damages, with respect to any
25  *  claim arising out of or in connection with the use of the software,
26  *  even if it has been or is hereafter advised of the possibility of
27  *  such damages.
28  */
29
30 #include <linux/nfs_fs.h>
31 #include <linux/nfs_page.h>
32 #include <linux/module.h>
33 #include <linux/sort.h>
34 #include "internal.h"
35 #include "pnfs.h"
36 #include "iostat.h"
37 #include "nfs4trace.h"
38 #include "delegation.h"
39 #include "nfs42.h"
40 #include "nfs4_fs.h"
41
42 #define NFSDBG_FACILITY         NFSDBG_PNFS
43 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
44
45 /* Locking:
46  *
47  * pnfs_spinlock:
48  *      protects pnfs_modules_tbl.
49  */
50 static DEFINE_SPINLOCK(pnfs_spinlock);
51
52 /*
53  * pnfs_modules_tbl holds all pnfs modules
54  */
55 static LIST_HEAD(pnfs_modules_tbl);
56
57 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
58 static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
59                 struct list_head *free_me,
60                 const struct pnfs_layout_range *range,
61                 u32 seq);
62 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
63                                 struct list_head *tmp_list);
64
65 /* Return the registered pnfs layout driver module matching given id */
66 static struct pnfs_layoutdriver_type *
67 find_pnfs_driver_locked(u32 id)
68 {
69         struct pnfs_layoutdriver_type *local;
70
71         list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
72                 if (local->id == id)
73                         goto out;
74         local = NULL;
75 out:
76         dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
77         return local;
78 }
79
80 static struct pnfs_layoutdriver_type *
81 find_pnfs_driver(u32 id)
82 {
83         struct pnfs_layoutdriver_type *local;
84
85         spin_lock(&pnfs_spinlock);
86         local = find_pnfs_driver_locked(id);
87         if (local != NULL && !try_module_get(local->owner)) {
88                 dprintk("%s: Could not grab reference on module\n", __func__);
89                 local = NULL;
90         }
91         spin_unlock(&pnfs_spinlock);
92         return local;
93 }
94
95 void
96 unset_pnfs_layoutdriver(struct nfs_server *nfss)
97 {
98         if (nfss->pnfs_curr_ld) {
99                 if (nfss->pnfs_curr_ld->clear_layoutdriver)
100                         nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
101                 /* Decrement the MDS count. Purge the deviceid cache if zero */
102                 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
103                         nfs4_deviceid_purge_client(nfss->nfs_client);
104                 module_put(nfss->pnfs_curr_ld->owner);
105         }
106         nfss->pnfs_curr_ld = NULL;
107 }
108
109 /*
110  * When the server sends a list of layout types, we choose one in the order
111  * given in the list below.
112  *
113  * FIXME: should this list be configurable in some fashion? module param?
114  *        mount option? something else?
115  */
116 static const u32 ld_prefs[] = {
117         LAYOUT_SCSI,
118         LAYOUT_BLOCK_VOLUME,
119         LAYOUT_OSD2_OBJECTS,
120         LAYOUT_FLEX_FILES,
121         LAYOUT_NFSV4_1_FILES,
122         0
123 };
124
125 static int
126 ld_cmp(const void *e1, const void *e2)
127 {
128         u32 ld1 = *((u32 *)e1);
129         u32 ld2 = *((u32 *)e2);
130         int i;
131
132         for (i = 0; ld_prefs[i] != 0; i++) {
133                 if (ld1 == ld_prefs[i])
134                         return -1;
135
136                 if (ld2 == ld_prefs[i])
137                         return 1;
138         }
139         return 0;
140 }
141
142 /*
143  * Try to set the server's pnfs module to the pnfs layout type specified by id.
144  * Currently only one pNFS layout driver per filesystem is supported.
145  *
146  * @ids array of layout types supported by MDS.
147  */
148 void
149 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
150                       struct nfs_fsinfo *fsinfo)
151 {
152         struct pnfs_layoutdriver_type *ld_type = NULL;
153         u32 id;
154         int i;
155
156         if (fsinfo->nlayouttypes == 0)
157                 goto out_no_driver;
158         if (!(server->nfs_client->cl_exchange_flags &
159                  (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
160                 printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
161                         __func__, server->nfs_client->cl_exchange_flags);
162                 goto out_no_driver;
163         }
164
165         sort(fsinfo->layouttype, fsinfo->nlayouttypes,
166                 sizeof(*fsinfo->layouttype), ld_cmp, NULL);
167
168         for (i = 0; i < fsinfo->nlayouttypes; i++) {
169                 id = fsinfo->layouttype[i];
170                 ld_type = find_pnfs_driver(id);
171                 if (!ld_type) {
172                         request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
173                                         id);
174                         ld_type = find_pnfs_driver(id);
175                 }
176                 if (ld_type)
177                         break;
178         }
179
180         if (!ld_type) {
181                 dprintk("%s: No pNFS module found!\n", __func__);
182                 goto out_no_driver;
183         }
184
185         server->pnfs_curr_ld = ld_type;
186         if (ld_type->set_layoutdriver
187             && ld_type->set_layoutdriver(server, mntfh)) {
188                 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
189                         "driver %u.\n", __func__, id);
190                 module_put(ld_type->owner);
191                 goto out_no_driver;
192         }
193         /* Bump the MDS count */
194         atomic_inc(&server->nfs_client->cl_mds_count);
195
196         dprintk("%s: pNFS module for %u set\n", __func__, id);
197         return;
198
199 out_no_driver:
200         dprintk("%s: Using NFSv4 I/O\n", __func__);
201         server->pnfs_curr_ld = NULL;
202 }
203
204 int
205 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
206 {
207         int status = -EINVAL;
208         struct pnfs_layoutdriver_type *tmp;
209
210         if (ld_type->id == 0) {
211                 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__);
212                 return status;
213         }
214         if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
215                 printk(KERN_ERR "NFS: %s Layout driver must provide "
216                        "alloc_lseg and free_lseg.\n", __func__);
217                 return status;
218         }
219
220         spin_lock(&pnfs_spinlock);
221         tmp = find_pnfs_driver_locked(ld_type->id);
222         if (!tmp) {
223                 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
224                 status = 0;
225                 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
226                         ld_type->name);
227         } else {
228                 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n",
229                         __func__, ld_type->id);
230         }
231         spin_unlock(&pnfs_spinlock);
232
233         return status;
234 }
235 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
236
237 void
238 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
239 {
240         dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
241         spin_lock(&pnfs_spinlock);
242         list_del(&ld_type->pnfs_tblid);
243         spin_unlock(&pnfs_spinlock);
244 }
245 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
246
247 /*
248  * pNFS client layout cache
249  */
250
251 /* Need to hold i_lock if caller does not already hold reference */
252 void
253 pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
254 {
255         refcount_inc(&lo->plh_refcount);
256 }
257
258 static struct pnfs_layout_hdr *
259 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
260 {
261         struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
262         return ld->alloc_layout_hdr(ino, gfp_flags);
263 }
264
265 static void
266 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
267 {
268         struct nfs_server *server = NFS_SERVER(lo->plh_inode);
269         struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
270
271         if (!list_empty(&lo->plh_layouts)) {
272                 struct nfs_client *clp = server->nfs_client;
273
274                 spin_lock(&clp->cl_lock);
275                 list_del_init(&lo->plh_layouts);
276                 spin_unlock(&clp->cl_lock);
277         }
278         put_rpccred(lo->plh_lc_cred);
279         return ld->free_layout_hdr(lo);
280 }
281
282 static void
283 pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
284 {
285         struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
286         dprintk("%s: freeing layout cache %p\n", __func__, lo);
287         nfsi->layout = NULL;
288         /* Reset MDS Threshold I/O counters */
289         nfsi->write_io = 0;
290         nfsi->read_io = 0;
291 }
292
293 void
294 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
295 {
296         struct inode *inode;
297
298         if (!lo)
299                 return;
300         inode = lo->plh_inode;
301         pnfs_layoutreturn_before_put_layout_hdr(lo);
302
303         if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
304                 if (!list_empty(&lo->plh_segs))
305                         WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
306                 pnfs_detach_layout_hdr(lo);
307                 spin_unlock(&inode->i_lock);
308                 pnfs_free_layout_hdr(lo);
309         }
310 }
311
312 static void
313 pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
314                          u32 seq)
315 {
316         if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode)
317                 iomode = IOMODE_ANY;
318         lo->plh_return_iomode = iomode;
319         set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
320         if (seq != 0) {
321                 WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq);
322                 lo->plh_return_seq = seq;
323         }
324 }
325
326 static void
327 pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
328 {
329         struct pnfs_layout_segment *lseg;
330         lo->plh_return_iomode = 0;
331         lo->plh_return_seq = 0;
332         clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
333         list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
334                 if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
335                         continue;
336                 pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
337         }
338 }
339
340 static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
341 {
342         clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
343         clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags);
344         smp_mb__after_atomic();
345         wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
346         rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
347 }
348
349 static void
350 pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
351                 struct list_head *free_me)
352 {
353         clear_bit(NFS_LSEG_ROC, &lseg->pls_flags);
354         clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
355         if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags))
356                 pnfs_lseg_dec_and_remove_zero(lseg, free_me);
357         if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
358                 pnfs_lseg_dec_and_remove_zero(lseg, free_me);
359 }
360
361 /*
362  * Update the seqid of a layout stateid
363  */
364 bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode)
365 {
366         struct pnfs_layout_hdr *lo;
367         bool ret = false;
368
369         spin_lock(&inode->i_lock);
370         lo = NFS_I(inode)->layout;
371         if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
372                 dst->seqid = lo->plh_stateid.seqid;
373                 ret = true;
374         }
375         spin_unlock(&inode->i_lock);
376         return ret;
377 }
378
379 /*
380  * Mark a pnfs_layout_hdr and all associated layout segments as invalid
381  *
382  * In order to continue using the pnfs_layout_hdr, a full recovery
383  * is required.
384  * Note that caller must hold inode->i_lock.
385  */
386 int
387 pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
388                 struct list_head *lseg_list)
389 {
390         struct pnfs_layout_range range = {
391                 .iomode = IOMODE_ANY,
392                 .offset = 0,
393                 .length = NFS4_MAX_UINT64,
394         };
395         struct pnfs_layout_segment *lseg, *next;
396
397         set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
398         list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
399                 pnfs_clear_lseg_state(lseg, lseg_list);
400         pnfs_clear_layoutreturn_info(lo);
401         pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
402         if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
403             !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
404                 pnfs_clear_layoutreturn_waitbit(lo);
405         return !list_empty(&lo->plh_segs);
406 }
407
408 static int
409 pnfs_iomode_to_fail_bit(u32 iomode)
410 {
411         return iomode == IOMODE_RW ?
412                 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
413 }
414
415 static void
416 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
417 {
418         lo->plh_retry_timestamp = jiffies;
419         if (!test_and_set_bit(fail_bit, &lo->plh_flags))
420                 refcount_inc(&lo->plh_refcount);
421 }
422
423 static void
424 pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
425 {
426         if (test_and_clear_bit(fail_bit, &lo->plh_flags))
427                 refcount_dec(&lo->plh_refcount);
428 }
429
430 static void
431 pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
432 {
433         struct inode *inode = lo->plh_inode;
434         struct pnfs_layout_range range = {
435                 .iomode = iomode,
436                 .offset = 0,
437                 .length = NFS4_MAX_UINT64,
438         };
439         LIST_HEAD(head);
440
441         spin_lock(&inode->i_lock);
442         pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
443         pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0);
444         spin_unlock(&inode->i_lock);
445         pnfs_free_lseg_list(&head);
446         dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
447                         iomode == IOMODE_RW ?  "RW" : "READ");
448 }
449
450 static bool
451 pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
452 {
453         unsigned long start, end;
454         int fail_bit = pnfs_iomode_to_fail_bit(iomode);
455
456         if (test_bit(fail_bit, &lo->plh_flags) == 0)
457                 return false;
458         end = jiffies;
459         start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT;
460         if (!time_in_range(lo->plh_retry_timestamp, start, end)) {
461                 /* It is time to retry the failed layoutgets */
462                 pnfs_layout_clear_fail_bit(lo, fail_bit);
463                 return false;
464         }
465         return true;
466 }
467
468 static void
469 pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg,
470                 const struct pnfs_layout_range *range,
471                 const nfs4_stateid *stateid)
472 {
473         INIT_LIST_HEAD(&lseg->pls_list);
474         INIT_LIST_HEAD(&lseg->pls_lc_list);
475         refcount_set(&lseg->pls_refcount, 1);
476         set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
477         lseg->pls_layout = lo;
478         lseg->pls_range = *range;
479         lseg->pls_seq = be32_to_cpu(stateid->seqid);
480 }
481
482 static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
483 {
484         if (lseg != NULL) {
485                 struct inode *inode = lseg->pls_layout->plh_inode;
486                 NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg);
487         }
488 }
489
490 static void
491 pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
492                 struct pnfs_layout_segment *lseg)
493 {
494         WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
495         list_del_init(&lseg->pls_list);
496         /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
497         refcount_dec(&lo->plh_refcount);
498         if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
499                 return;
500         if (list_empty(&lo->plh_segs) &&
501             !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
502             !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
503                 if (atomic_read(&lo->plh_outstanding) == 0)
504                         set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
505                 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
506         }
507 }
508
509 static bool
510 pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo,
511                 struct pnfs_layout_segment *lseg)
512 {
513         if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) &&
514             pnfs_layout_is_valid(lo)) {
515                 pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
516                 list_move_tail(&lseg->pls_list, &lo->plh_return_segs);
517                 return true;
518         }
519         return false;
520 }
521
522 void
523 pnfs_put_lseg(struct pnfs_layout_segment *lseg)
524 {
525         struct pnfs_layout_hdr *lo;
526         struct inode *inode;
527
528         if (!lseg)
529                 return;
530
531         dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
532                 refcount_read(&lseg->pls_refcount),
533                 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
534
535         lo = lseg->pls_layout;
536         inode = lo->plh_inode;
537
538         if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
539                 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
540                         spin_unlock(&inode->i_lock);
541                         return;
542                 }
543                 pnfs_get_layout_hdr(lo);
544                 pnfs_layout_remove_lseg(lo, lseg);
545                 if (pnfs_cache_lseg_for_layoutreturn(lo, lseg))
546                         lseg = NULL;
547                 spin_unlock(&inode->i_lock);
548                 pnfs_free_lseg(lseg);
549                 pnfs_put_layout_hdr(lo);
550         }
551 }
552 EXPORT_SYMBOL_GPL(pnfs_put_lseg);
553
554 /*
555  * is l2 fully contained in l1?
556  *   start1                             end1
557  *   [----------------------------------)
558  *           start2           end2
559  *           [----------------)
560  */
561 static bool
562 pnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
563                  const struct pnfs_layout_range *l2)
564 {
565         u64 start1 = l1->offset;
566         u64 end1 = pnfs_end_offset(start1, l1->length);
567         u64 start2 = l2->offset;
568         u64 end2 = pnfs_end_offset(start2, l2->length);
569
570         return (start1 <= start2) && (end1 >= end2);
571 }
572
573 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
574                 struct list_head *tmp_list)
575 {
576         if (!refcount_dec_and_test(&lseg->pls_refcount))
577                 return false;
578         pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
579         list_add(&lseg->pls_list, tmp_list);
580         return true;
581 }
582
583 /* Returns 1 if lseg is removed from list, 0 otherwise */
584 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
585                              struct list_head *tmp_list)
586 {
587         int rv = 0;
588
589         if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
590                 /* Remove the reference keeping the lseg in the
591                  * list.  It will now be removed when all
592                  * outstanding io is finished.
593                  */
594                 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
595                         refcount_read(&lseg->pls_refcount));
596                 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
597                         rv = 1;
598         }
599         return rv;
600 }
601
602 /*
603  * Compare 2 layout stateid sequence ids, to see which is newer,
604  * taking into account wraparound issues.
605  */
606 static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
607 {
608         return (s32)(s1 - s2) > 0;
609 }
610
611 static bool
612 pnfs_should_free_range(const struct pnfs_layout_range *lseg_range,
613                  const struct pnfs_layout_range *recall_range)
614 {
615         return (recall_range->iomode == IOMODE_ANY ||
616                 lseg_range->iomode == recall_range->iomode) &&
617                pnfs_lseg_range_intersecting(lseg_range, recall_range);
618 }
619
620 static bool
621 pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg,
622                 const struct pnfs_layout_range *recall_range,
623                 u32 seq)
624 {
625         if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq))
626                 return false;
627         if (recall_range == NULL)
628                 return true;
629         return pnfs_should_free_range(&lseg->pls_range, recall_range);
630 }
631
632 /**
633  * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later
634  * @lo: layout header containing the lsegs
635  * @tmp_list: list head where doomed lsegs should go
636  * @recall_range: optional recall range argument to match (may be NULL)
637  * @seq: only invalidate lsegs obtained prior to this sequence (may be 0)
638  *
639  * Walk the list of lsegs in the layout header, and tear down any that should
640  * be destroyed. If "recall_range" is specified then the segment must match
641  * that range. If "seq" is non-zero, then only match segments that were handed
642  * out at or before that sequence.
643  *
644  * Returns number of matching invalid lsegs remaining in list after scanning
645  * it and purging them.
646  */
647 int
648 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
649                             struct list_head *tmp_list,
650                             const struct pnfs_layout_range *recall_range,
651                             u32 seq)
652 {
653         struct pnfs_layout_segment *lseg, *next;
654         int remaining = 0;
655
656         dprintk("%s:Begin lo %p\n", __func__, lo);
657
658         if (list_empty(&lo->plh_segs))
659                 return 0;
660         list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
661                 if (pnfs_match_lseg_recall(lseg, recall_range, seq)) {
662                         dprintk("%s: freeing lseg %p iomode %d seq %u "
663                                 "offset %llu length %llu\n", __func__,
664                                 lseg, lseg->pls_range.iomode, lseg->pls_seq,
665                                 lseg->pls_range.offset, lseg->pls_range.length);
666                         if (!mark_lseg_invalid(lseg, tmp_list))
667                                 remaining++;
668                 }
669         dprintk("%s:Return %i\n", __func__, remaining);
670         return remaining;
671 }
672
673 static void
674 pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
675                 struct list_head *free_me,
676                 const struct pnfs_layout_range *range,
677                 u32 seq)
678 {
679         struct pnfs_layout_segment *lseg, *next;
680
681         list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) {
682                 if (pnfs_match_lseg_recall(lseg, range, seq))
683                         list_move_tail(&lseg->pls_list, free_me);
684         }
685 }
686
687 /* note free_me must contain lsegs from a single layout_hdr */
688 void
689 pnfs_free_lseg_list(struct list_head *free_me)
690 {
691         struct pnfs_layout_segment *lseg, *tmp;
692
693         if (list_empty(free_me))
694                 return;
695
696         list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
697                 list_del(&lseg->pls_list);
698                 pnfs_free_lseg(lseg);
699         }
700 }
701
702 void
703 pnfs_destroy_layout(struct nfs_inode *nfsi)
704 {
705         struct pnfs_layout_hdr *lo;
706         LIST_HEAD(tmp_list);
707
708         spin_lock(&nfsi->vfs_inode.i_lock);
709         lo = nfsi->layout;
710         if (lo) {
711                 pnfs_get_layout_hdr(lo);
712                 pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
713                 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
714                 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
715                 spin_unlock(&nfsi->vfs_inode.i_lock);
716                 pnfs_free_lseg_list(&tmp_list);
717                 nfs_commit_inode(&nfsi->vfs_inode, 0);
718                 pnfs_put_layout_hdr(lo);
719         } else
720                 spin_unlock(&nfsi->vfs_inode.i_lock);
721 }
722 EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
723
724 static bool
725 pnfs_layout_add_bulk_destroy_list(struct inode *inode,
726                 struct list_head *layout_list)
727 {
728         struct pnfs_layout_hdr *lo;
729         bool ret = false;
730
731         spin_lock(&inode->i_lock);
732         lo = NFS_I(inode)->layout;
733         if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
734                 pnfs_get_layout_hdr(lo);
735                 list_add(&lo->plh_bulk_destroy, layout_list);
736                 ret = true;
737         }
738         spin_unlock(&inode->i_lock);
739         return ret;
740 }
741
742 /* Caller must hold rcu_read_lock and clp->cl_lock */
743 static int
744 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
745                 struct nfs_server *server,
746                 struct list_head *layout_list)
747 {
748         struct pnfs_layout_hdr *lo, *next;
749         struct inode *inode;
750
751         list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
752                 if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
753                         continue;
754                 inode = igrab(lo->plh_inode);
755                 if (inode == NULL)
756                         continue;
757                 list_del_init(&lo->plh_layouts);
758                 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
759                         continue;
760                 rcu_read_unlock();
761                 spin_unlock(&clp->cl_lock);
762                 iput(inode);
763                 spin_lock(&clp->cl_lock);
764                 rcu_read_lock();
765                 return -EAGAIN;
766         }
767         return 0;
768 }
769
770 static int
771 pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
772                 bool is_bulk_recall)
773 {
774         struct pnfs_layout_hdr *lo;
775         struct inode *inode;
776         LIST_HEAD(lseg_list);
777         int ret = 0;
778
779         while (!list_empty(layout_list)) {
780                 lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
781                                 plh_bulk_destroy);
782                 dprintk("%s freeing layout for inode %lu\n", __func__,
783                         lo->plh_inode->i_ino);
784                 inode = lo->plh_inode;
785
786                 pnfs_layoutcommit_inode(inode, false);
787
788                 spin_lock(&inode->i_lock);
789                 list_del_init(&lo->plh_bulk_destroy);
790                 if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
791                         if (is_bulk_recall)
792                                 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
793                         ret = -EAGAIN;
794                 }
795                 spin_unlock(&inode->i_lock);
796                 pnfs_free_lseg_list(&lseg_list);
797                 /* Free all lsegs that are attached to commit buckets */
798                 nfs_commit_inode(inode, 0);
799                 pnfs_put_layout_hdr(lo);
800                 iput(inode);
801         }
802         return ret;
803 }
804
805 int
806 pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
807                 struct nfs_fsid *fsid,
808                 bool is_recall)
809 {
810         struct nfs_server *server;
811         LIST_HEAD(layout_list);
812
813         spin_lock(&clp->cl_lock);
814         rcu_read_lock();
815 restart:
816         list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
817                 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
818                         continue;
819                 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
820                                 server,
821                                 &layout_list) != 0)
822                         goto restart;
823         }
824         rcu_read_unlock();
825         spin_unlock(&clp->cl_lock);
826
827         if (list_empty(&layout_list))
828                 return 0;
829         return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
830 }
831
832 int
833 pnfs_destroy_layouts_byclid(struct nfs_client *clp,
834                 bool is_recall)
835 {
836         struct nfs_server *server;
837         LIST_HEAD(layout_list);
838
839         spin_lock(&clp->cl_lock);
840         rcu_read_lock();
841 restart:
842         list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
843                 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
844                                         server,
845                                         &layout_list) != 0)
846                         goto restart;
847         }
848         rcu_read_unlock();
849         spin_unlock(&clp->cl_lock);
850
851         if (list_empty(&layout_list))
852                 return 0;
853         return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
854 }
855
856 /*
857  * Called by the state manger to remove all layouts established under an
858  * expired lease.
859  */
860 void
861 pnfs_destroy_all_layouts(struct nfs_client *clp)
862 {
863         nfs4_deviceid_mark_client_invalid(clp);
864         nfs4_deviceid_purge_client(clp);
865
866         pnfs_destroy_layouts_byclid(clp, false);
867 }
868
869 /* update lo->plh_stateid with new if is more recent */
870 void
871 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
872                         bool update_barrier)
873 {
874         u32 oldseq, newseq, new_barrier = 0;
875
876         oldseq = be32_to_cpu(lo->plh_stateid.seqid);
877         newseq = be32_to_cpu(new->seqid);
878
879         if (!pnfs_layout_is_valid(lo)) {
880                 nfs4_stateid_copy(&lo->plh_stateid, new);
881                 lo->plh_barrier = newseq;
882                 pnfs_clear_layoutreturn_info(lo);
883                 clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
884                 return;
885         }
886         if (pnfs_seqid_is_newer(newseq, oldseq)) {
887                 nfs4_stateid_copy(&lo->plh_stateid, new);
888                 /*
889                  * Because of wraparound, we want to keep the barrier
890                  * "close" to the current seqids.
891                  */
892                 new_barrier = newseq - atomic_read(&lo->plh_outstanding);
893         }
894         if (update_barrier)
895                 new_barrier = be32_to_cpu(new->seqid);
896         else if (new_barrier == 0)
897                 return;
898         if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
899                 lo->plh_barrier = new_barrier;
900 }
901
902 static bool
903 pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
904                 const nfs4_stateid *stateid)
905 {
906         u32 seqid = be32_to_cpu(stateid->seqid);
907
908         return !pnfs_seqid_is_newer(seqid, lo->plh_barrier);
909 }
910
911 /* lget is set to 1 if called from inside send_layoutget call chain */
912 static bool
913 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
914 {
915         return lo->plh_block_lgets ||
916                 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
917 }
918
919 static struct nfs_server *
920 pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx)
921 {
922         struct nfs_server *server;
923
924         if (inode) {
925                 server = NFS_SERVER(inode);
926         } else {
927                 struct dentry *parent_dir = dget_parent(ctx->dentry);
928                 server = NFS_SERVER(parent_dir->d_inode);
929                 dput(parent_dir);
930         }
931         return server;
932 }
933
934 static void nfs4_free_pages(struct page **pages, size_t size)
935 {
936         int i;
937
938         if (!pages)
939                 return;
940
941         for (i = 0; i < size; i++) {
942                 if (!pages[i])
943                         break;
944                 __free_page(pages[i]);
945         }
946         kfree(pages);
947 }
948
949 static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
950 {
951         struct page **pages;
952         int i;
953
954         pages = kcalloc(size, sizeof(struct page *), gfp_flags);
955         if (!pages) {
956                 dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
957                 return NULL;
958         }
959
960         for (i = 0; i < size; i++) {
961                 pages[i] = alloc_page(gfp_flags);
962                 if (!pages[i]) {
963                         dprintk("%s: failed to allocate page\n", __func__);
964                         nfs4_free_pages(pages, size);
965                         return NULL;
966                 }
967         }
968
969         return pages;
970 }
971
972 static struct nfs4_layoutget *
973 pnfs_alloc_init_layoutget_args(struct inode *ino,
974            struct nfs_open_context *ctx,
975            const nfs4_stateid *stateid,
976            const struct pnfs_layout_range *range,
977            gfp_t gfp_flags)
978 {
979         struct nfs_server *server = pnfs_find_server(ino, ctx);
980         size_t max_pages = max_response_pages(server);
981         struct nfs4_layoutget *lgp;
982
983         dprintk("--> %s\n", __func__);
984
985         lgp = kzalloc(sizeof(*lgp), gfp_flags);
986         if (lgp == NULL)
987                 return NULL;
988
989         lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
990         if (!lgp->args.layout.pages) {
991                 kfree(lgp);
992                 return NULL;
993         }
994         lgp->args.layout.pglen = max_pages * PAGE_SIZE;
995         lgp->res.layoutp = &lgp->args.layout;
996
997         /* Don't confuse uninitialised result and success */
998         lgp->res.status = -NFS4ERR_DELAY;
999
1000         lgp->args.minlength = PAGE_SIZE;
1001         if (lgp->args.minlength > range->length)
1002                 lgp->args.minlength = range->length;
1003         if (ino) {
1004                 loff_t i_size = i_size_read(ino);
1005
1006                 if (range->iomode == IOMODE_READ) {
1007                         if (range->offset >= i_size)
1008                                 lgp->args.minlength = 0;
1009                         else if (i_size - range->offset < lgp->args.minlength)
1010                                 lgp->args.minlength = i_size - range->offset;
1011                 }
1012         }
1013         lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
1014         pnfs_copy_range(&lgp->args.range, range);
1015         lgp->args.type = server->pnfs_curr_ld->id;
1016         lgp->args.inode = ino;
1017         lgp->args.ctx = get_nfs_open_context(ctx);
1018         nfs4_stateid_copy(&lgp->args.stateid, stateid);
1019         lgp->gfp_flags = gfp_flags;
1020         lgp->cred = get_rpccred(ctx->cred);
1021         lgp->callback_count = raw_seqcount_begin(&server->nfs_client->cl_callback_count);
1022         return lgp;
1023 }
1024
1025 void pnfs_layoutget_free(struct nfs4_layoutget *lgp)
1026 {
1027         size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE;
1028
1029         nfs4_free_pages(lgp->args.layout.pages, max_pages);
1030         if (lgp->args.inode)
1031                 pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout);
1032         put_rpccred(lgp->cred);
1033         put_nfs_open_context(lgp->args.ctx);
1034         kfree(lgp);
1035 }
1036
1037 static void pnfs_clear_layoutcommit(struct inode *inode,
1038                 struct list_head *head)
1039 {
1040         struct nfs_inode *nfsi = NFS_I(inode);
1041         struct pnfs_layout_segment *lseg, *tmp;
1042
1043         if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1044                 return;
1045         list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
1046                 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
1047                         continue;
1048                 pnfs_lseg_dec_and_remove_zero(lseg, head);
1049         }
1050 }
1051
1052 void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
1053                 const nfs4_stateid *arg_stateid,
1054                 const struct pnfs_layout_range *range,
1055                 const nfs4_stateid *stateid)
1056 {
1057         struct inode *inode = lo->plh_inode;
1058         LIST_HEAD(freeme);
1059
1060         spin_lock(&inode->i_lock);
1061         if (!pnfs_layout_is_valid(lo) || !arg_stateid ||
1062             !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
1063                 goto out_unlock;
1064         if (stateid) {
1065                 u32 seq = be32_to_cpu(arg_stateid->seqid);
1066
1067                 pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
1068                 pnfs_free_returned_lsegs(lo, &freeme, range, seq);
1069                 pnfs_set_layout_stateid(lo, stateid, true);
1070         } else
1071                 pnfs_mark_layout_stateid_invalid(lo, &freeme);
1072 out_unlock:
1073         pnfs_clear_layoutreturn_waitbit(lo);
1074         spin_unlock(&inode->i_lock);
1075         pnfs_free_lseg_list(&freeme);
1076
1077 }
1078
1079 static bool
1080 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
1081                 nfs4_stateid *stateid,
1082                 enum pnfs_iomode *iomode)
1083 {
1084         /* Serialise LAYOUTGET/LAYOUTRETURN */
1085         if (atomic_read(&lo->plh_outstanding) != 0)
1086                 return false;
1087         if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
1088                 return false;
1089         set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
1090         pnfs_get_layout_hdr(lo);
1091         if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) {
1092                 if (stateid != NULL) {
1093                         nfs4_stateid_copy(stateid, &lo->plh_stateid);
1094                         if (lo->plh_return_seq != 0)
1095                                 stateid->seqid = cpu_to_be32(lo->plh_return_seq);
1096                 }
1097                 if (iomode != NULL)
1098                         *iomode = lo->plh_return_iomode;
1099                 pnfs_clear_layoutreturn_info(lo);
1100                 return true;
1101         }
1102         if (stateid != NULL)
1103                 nfs4_stateid_copy(stateid, &lo->plh_stateid);
1104         if (iomode != NULL)
1105                 *iomode = IOMODE_ANY;
1106         return true;
1107 }
1108
1109 static void
1110 pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args,
1111                 struct pnfs_layout_hdr *lo,
1112                 const nfs4_stateid *stateid,
1113                 enum pnfs_iomode iomode)
1114 {
1115         struct inode *inode = lo->plh_inode;
1116
1117         args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id;
1118         args->inode = inode;
1119         args->range.iomode = iomode;
1120         args->range.offset = 0;
1121         args->range.length = NFS4_MAX_UINT64;
1122         args->layout = lo;
1123         nfs4_stateid_copy(&args->stateid, stateid);
1124 }
1125
1126 static int
1127 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
1128                        enum pnfs_iomode iomode, bool sync)
1129 {
1130         struct inode *ino = lo->plh_inode;
1131         struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
1132         struct nfs4_layoutreturn *lrp;
1133         int status = 0;
1134
1135         lrp = kzalloc(sizeof(*lrp), GFP_NOFS);
1136         if (unlikely(lrp == NULL)) {
1137                 status = -ENOMEM;
1138                 spin_lock(&ino->i_lock);
1139                 pnfs_clear_layoutreturn_waitbit(lo);
1140                 spin_unlock(&ino->i_lock);
1141                 pnfs_put_layout_hdr(lo);
1142                 goto out;
1143         }
1144
1145         pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode);
1146         lrp->args.ld_private = &lrp->ld_private;
1147         lrp->clp = NFS_SERVER(ino)->nfs_client;
1148         lrp->cred = lo->plh_lc_cred;
1149         if (ld->prepare_layoutreturn)
1150                 ld->prepare_layoutreturn(&lrp->args);
1151
1152         status = nfs4_proc_layoutreturn(lrp, sync);
1153 out:
1154         dprintk("<-- %s status: %d\n", __func__, status);
1155         return status;
1156 }
1157
1158 /* Return true if layoutreturn is needed */
1159 static bool
1160 pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
1161 {
1162         struct pnfs_layout_segment *s;
1163
1164         if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
1165                 return false;
1166
1167         /* Defer layoutreturn until all lsegs are done */
1168         list_for_each_entry(s, &lo->plh_segs, pls_list) {
1169                 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
1170                         return false;
1171         }
1172
1173         return true;
1174 }
1175
1176 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
1177 {
1178         struct inode *inode= lo->plh_inode;
1179
1180         if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
1181                 return;
1182         spin_lock(&inode->i_lock);
1183         if (pnfs_layout_need_return(lo)) {
1184                 nfs4_stateid stateid;
1185                 enum pnfs_iomode iomode;
1186                 bool send;
1187
1188                 send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
1189                 spin_unlock(&inode->i_lock);
1190                 if (send) {
1191                         /* Send an async layoutreturn so we dont deadlock */
1192                         pnfs_send_layoutreturn(lo, &stateid, iomode, false);
1193                 }
1194         } else
1195                 spin_unlock(&inode->i_lock);
1196 }
1197
1198 /*
1199  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
1200  * when the layout segment list is empty.
1201  *
1202  * Note that a pnfs_layout_hdr can exist with an empty layout segment
1203  * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the
1204  * deviceid is marked invalid.
1205  */
1206 int
1207 _pnfs_return_layout(struct inode *ino)
1208 {
1209         struct pnfs_layout_hdr *lo = NULL;
1210         struct nfs_inode *nfsi = NFS_I(ino);
1211         LIST_HEAD(tmp_list);
1212         nfs4_stateid stateid;
1213         int status = 0;
1214         bool send, valid_layout;
1215
1216         dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
1217
1218         spin_lock(&ino->i_lock);
1219         lo = nfsi->layout;
1220         if (!lo) {
1221                 spin_unlock(&ino->i_lock);
1222                 dprintk("NFS: %s no layout to return\n", __func__);
1223                 goto out;
1224         }
1225         /* Reference matched in nfs4_layoutreturn_release */
1226         pnfs_get_layout_hdr(lo);
1227         /* Is there an outstanding layoutreturn ? */
1228         if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
1229                 spin_unlock(&ino->i_lock);
1230                 if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
1231                                         TASK_UNINTERRUPTIBLE))
1232                         goto out_put_layout_hdr;
1233                 spin_lock(&ino->i_lock);
1234         }
1235         valid_layout = pnfs_layout_is_valid(lo);
1236         pnfs_clear_layoutcommit(ino, &tmp_list);
1237         pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0);
1238
1239         if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
1240                 struct pnfs_layout_range range = {
1241                         .iomode         = IOMODE_ANY,
1242                         .offset         = 0,
1243                         .length         = NFS4_MAX_UINT64,
1244                 };
1245                 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
1246         }
1247
1248         /* Don't send a LAYOUTRETURN if list was initially empty */
1249         if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
1250                         !valid_layout) {
1251                 spin_unlock(&ino->i_lock);
1252                 dprintk("NFS: %s no layout segments to return\n", __func__);
1253                 goto out_put_layout_hdr;
1254         }
1255
1256         send = pnfs_prepare_layoutreturn(lo, &stateid, NULL);
1257         spin_unlock(&ino->i_lock);
1258         if (send)
1259                 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
1260 out_put_layout_hdr:
1261         pnfs_free_lseg_list(&tmp_list);
1262         pnfs_put_layout_hdr(lo);
1263 out:
1264         dprintk("<-- %s status: %d\n", __func__, status);
1265         return status;
1266 }
1267
1268 int
1269 pnfs_commit_and_return_layout(struct inode *inode)
1270 {
1271         struct pnfs_layout_hdr *lo;
1272         int ret;
1273
1274         spin_lock(&inode->i_lock);
1275         lo = NFS_I(inode)->layout;
1276         if (lo == NULL) {
1277                 spin_unlock(&inode->i_lock);
1278                 return 0;
1279         }
1280         pnfs_get_layout_hdr(lo);
1281         /* Block new layoutgets and read/write to ds */
1282         lo->plh_block_lgets++;
1283         spin_unlock(&inode->i_lock);
1284         filemap_fdatawait(inode->i_mapping);
1285         ret = pnfs_layoutcommit_inode(inode, true);
1286         if (ret == 0)
1287                 ret = _pnfs_return_layout(inode);
1288         spin_lock(&inode->i_lock);
1289         lo->plh_block_lgets--;
1290         spin_unlock(&inode->i_lock);
1291         pnfs_put_layout_hdr(lo);
1292         return ret;
1293 }
1294
1295 bool pnfs_roc(struct inode *ino,
1296                 struct nfs4_layoutreturn_args *args,
1297                 struct nfs4_layoutreturn_res *res,
1298                 const struct rpc_cred *cred)
1299 {
1300         struct nfs_inode *nfsi = NFS_I(ino);
1301         struct nfs_open_context *ctx;
1302         struct nfs4_state *state;
1303         struct pnfs_layout_hdr *lo;
1304         struct pnfs_layout_segment *lseg, *next;
1305         nfs4_stateid stateid;
1306         enum pnfs_iomode iomode = 0;
1307         bool layoutreturn = false, roc = false;
1308         bool skip_read = false;
1309
1310         if (!nfs_have_layout(ino))
1311                 return false;
1312 retry:
1313         spin_lock(&ino->i_lock);
1314         lo = nfsi->layout;
1315         if (!lo || !pnfs_layout_is_valid(lo) ||
1316             test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1317                 lo = NULL;
1318                 goto out_noroc;
1319         }
1320         pnfs_get_layout_hdr(lo);
1321         if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
1322                 spin_unlock(&ino->i_lock);
1323                 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
1324                                 TASK_UNINTERRUPTIBLE);
1325                 pnfs_put_layout_hdr(lo);
1326                 goto retry;
1327         }
1328
1329         /* no roc if we hold a delegation */
1330         if (nfs4_check_delegation(ino, FMODE_READ)) {
1331                 if (nfs4_check_delegation(ino, FMODE_WRITE))
1332                         goto out_noroc;
1333                 skip_read = true;
1334         }
1335
1336         list_for_each_entry(ctx, &nfsi->open_files, list) {
1337                 state = ctx->state;
1338                 if (state == NULL)
1339                         continue;
1340                 /* Don't return layout if there is open file state */
1341                 if (state->state & FMODE_WRITE)
1342                         goto out_noroc;
1343                 if (state->state & FMODE_READ)
1344                         skip_read = true;
1345         }
1346
1347
1348         list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
1349                 if (skip_read && lseg->pls_range.iomode == IOMODE_READ)
1350                         continue;
1351                 /* If we are sending layoutreturn, invalidate all valid lsegs */
1352                 if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags))
1353                         continue;
1354                 /*
1355                  * Note: mark lseg for return so pnfs_layout_remove_lseg
1356                  * doesn't invalidate the layout for us.
1357                  */
1358                 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
1359                 if (!mark_lseg_invalid(lseg, &lo->plh_return_segs))
1360                         continue;
1361                 pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
1362         }
1363
1364         if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
1365                 goto out_noroc;
1366
1367         /* ROC in two conditions:
1368          * 1. there are ROC lsegs
1369          * 2. we don't send layoutreturn
1370          */
1371         /* lo ref dropped in pnfs_roc_release() */
1372         layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
1373         /* If the creds don't match, we can't compound the layoutreturn */
1374         if (!layoutreturn || cred != lo->plh_lc_cred)
1375                 goto out_noroc;
1376
1377         roc = layoutreturn;
1378         pnfs_init_layoutreturn_args(args, lo, &stateid, iomode);
1379         res->lrs_present = 0;
1380         layoutreturn = false;
1381
1382 out_noroc:
1383         spin_unlock(&ino->i_lock);
1384         pnfs_layoutcommit_inode(ino, true);
1385         if (roc) {
1386                 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
1387                 if (ld->prepare_layoutreturn)
1388                         ld->prepare_layoutreturn(args);
1389                 pnfs_put_layout_hdr(lo);
1390                 return true;
1391         }
1392         if (layoutreturn)
1393                 pnfs_send_layoutreturn(lo, &stateid, iomode, true);
1394         pnfs_put_layout_hdr(lo);
1395         return false;
1396 }
1397
1398 void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
1399                 struct nfs4_layoutreturn_res *res,
1400                 int ret)
1401 {
1402         struct pnfs_layout_hdr *lo = args->layout;
1403         const nfs4_stateid *arg_stateid = NULL;
1404         const nfs4_stateid *res_stateid = NULL;
1405         struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
1406
1407         if (ret == 0) {
1408                 arg_stateid = &args->stateid;
1409                 if (res->lrs_present)
1410                         res_stateid = &res->stateid;
1411         }
1412         pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range,
1413                         res_stateid);
1414         if (ld_private && ld_private->ops && ld_private->ops->free)
1415                 ld_private->ops->free(ld_private);
1416         pnfs_put_layout_hdr(lo);
1417         trace_nfs4_layoutreturn_on_close(args->inode, 0);
1418 }
1419
1420 bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
1421 {
1422         struct nfs_inode *nfsi = NFS_I(ino);
1423         struct pnfs_layout_hdr *lo;
1424         bool sleep = false;
1425
1426         /* we might not have grabbed lo reference. so need to check under
1427          * i_lock */
1428         spin_lock(&ino->i_lock);
1429         lo = nfsi->layout;
1430         if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
1431                 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
1432                 sleep = true;
1433         }
1434         spin_unlock(&ino->i_lock);
1435         return sleep;
1436 }
1437
1438 /*
1439  * Compare two layout segments for sorting into layout cache.
1440  * We want to preferentially return RW over RO layouts, so ensure those
1441  * are seen first.
1442  */
1443 static s64
1444 pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1,
1445            const struct pnfs_layout_range *l2)
1446 {
1447         s64 d;
1448
1449         /* high offset > low offset */
1450         d = l1->offset - l2->offset;
1451         if (d)
1452                 return d;
1453
1454         /* short length > long length */
1455         d = l2->length - l1->length;
1456         if (d)
1457                 return d;
1458
1459         /* read > read/write */
1460         return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
1461 }
1462
1463 static bool
1464 pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1,
1465                 const struct pnfs_layout_range *l2)
1466 {
1467         return pnfs_lseg_range_cmp(l1, l2) > 0;
1468 }
1469
1470 static bool
1471 pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg,
1472                 struct pnfs_layout_segment *old)
1473 {
1474         return false;
1475 }
1476
1477 void
1478 pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo,
1479                    struct pnfs_layout_segment *lseg,
1480                    bool (*is_after)(const struct pnfs_layout_range *,
1481                            const struct pnfs_layout_range *),
1482                    bool (*do_merge)(struct pnfs_layout_segment *,
1483                            struct pnfs_layout_segment *),
1484                    struct list_head *free_me)
1485 {
1486         struct pnfs_layout_segment *lp, *tmp;
1487
1488         dprintk("%s:Begin\n", __func__);
1489
1490         list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) {
1491                 if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0)
1492                         continue;
1493                 if (do_merge(lseg, lp)) {
1494                         mark_lseg_invalid(lp, free_me);
1495                         continue;
1496                 }
1497                 if (is_after(&lseg->pls_range, &lp->pls_range))
1498                         continue;
1499                 list_add_tail(&lseg->pls_list, &lp->pls_list);
1500                 dprintk("%s: inserted lseg %p "
1501                         "iomode %d offset %llu length %llu before "
1502                         "lp %p iomode %d offset %llu length %llu\n",
1503                         __func__, lseg, lseg->pls_range.iomode,
1504                         lseg->pls_range.offset, lseg->pls_range.length,
1505                         lp, lp->pls_range.iomode, lp->pls_range.offset,
1506                         lp->pls_range.length);
1507                 goto out;
1508         }
1509         list_add_tail(&lseg->pls_list, &lo->plh_segs);
1510         dprintk("%s: inserted lseg %p "
1511                 "iomode %d offset %llu length %llu at tail\n",
1512                 __func__, lseg, lseg->pls_range.iomode,
1513                 lseg->pls_range.offset, lseg->pls_range.length);
1514 out:
1515         pnfs_get_layout_hdr(lo);
1516
1517         dprintk("%s:Return\n", __func__);
1518 }
1519 EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg);
1520
1521 static void
1522 pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
1523                    struct pnfs_layout_segment *lseg,
1524                    struct list_head *free_me)
1525 {
1526         struct inode *inode = lo->plh_inode;
1527         struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
1528
1529         if (ld->add_lseg != NULL)
1530                 ld->add_lseg(lo, lseg, free_me);
1531         else
1532                 pnfs_generic_layout_insert_lseg(lo, lseg,
1533                                 pnfs_lseg_range_is_after,
1534                                 pnfs_lseg_no_merge,
1535                                 free_me);
1536 }
1537
1538 static struct pnfs_layout_hdr *
1539 alloc_init_layout_hdr(struct inode *ino,
1540                       struct nfs_open_context *ctx,
1541                       gfp_t gfp_flags)
1542 {
1543         struct pnfs_layout_hdr *lo;
1544
1545         lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
1546         if (!lo)
1547                 return NULL;
1548         refcount_set(&lo->plh_refcount, 1);
1549         INIT_LIST_HEAD(&lo->plh_layouts);
1550         INIT_LIST_HEAD(&lo->plh_segs);
1551         INIT_LIST_HEAD(&lo->plh_return_segs);
1552         INIT_LIST_HEAD(&lo->plh_bulk_destroy);
1553         lo->plh_inode = ino;
1554         lo->plh_lc_cred = get_rpccred(ctx->cred);
1555         lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
1556         return lo;
1557 }
1558
1559 static struct pnfs_layout_hdr *
1560 pnfs_find_alloc_layout(struct inode *ino,
1561                        struct nfs_open_context *ctx,
1562                        gfp_t gfp_flags)
1563         __releases(&ino->i_lock)
1564         __acquires(&ino->i_lock)
1565 {
1566         struct nfs_inode *nfsi = NFS_I(ino);
1567         struct pnfs_layout_hdr *new = NULL;
1568
1569         dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
1570
1571         if (nfsi->layout != NULL)
1572                 goto out_existing;
1573         spin_unlock(&ino->i_lock);
1574         new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
1575         spin_lock(&ino->i_lock);
1576
1577         if (likely(nfsi->layout == NULL)) {     /* Won the race? */
1578                 nfsi->layout = new;
1579                 return new;
1580         } else if (new != NULL)
1581                 pnfs_free_layout_hdr(new);
1582 out_existing:
1583         pnfs_get_layout_hdr(nfsi->layout);
1584         return nfsi->layout;
1585 }
1586
1587 /*
1588  * iomode matching rules:
1589  * iomode       lseg    strict match
1590  *                      iomode
1591  * -----        -----   ------ -----
1592  * ANY          READ    N/A    true
1593  * ANY          RW      N/A    true
1594  * RW           READ    N/A    false
1595  * RW           RW      N/A    true
1596  * READ         READ    N/A    true
1597  * READ         RW      true   false
1598  * READ         RW      false  true
1599  */
1600 static bool
1601 pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
1602                  const struct pnfs_layout_range *range,
1603                  bool strict_iomode)
1604 {
1605         struct pnfs_layout_range range1;
1606
1607         if ((range->iomode == IOMODE_RW &&
1608              ls_range->iomode != IOMODE_RW) ||
1609             (range->iomode != ls_range->iomode &&
1610              strict_iomode) ||
1611             !pnfs_lseg_range_intersecting(ls_range, range))
1612                 return 0;
1613
1614         /* range1 covers only the first byte in the range */
1615         range1 = *range;
1616         range1.length = 1;
1617         return pnfs_lseg_range_contained(ls_range, &range1);
1618 }
1619
1620 /*
1621  * lookup range in layout
1622  */
1623 static struct pnfs_layout_segment *
1624 pnfs_find_lseg(struct pnfs_layout_hdr *lo,
1625                 struct pnfs_layout_range *range,
1626                 bool strict_iomode)
1627 {
1628         struct pnfs_layout_segment *lseg, *ret = NULL;
1629
1630         dprintk("%s:Begin\n", __func__);
1631
1632         list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
1633                 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
1634                     !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) &&
1635                     pnfs_lseg_range_match(&lseg->pls_range, range,
1636                                           strict_iomode)) {
1637                         ret = pnfs_get_lseg(lseg);
1638                         break;
1639                 }
1640         }
1641
1642         dprintk("%s:Return lseg %p ref %d\n",
1643                 __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0);
1644         return ret;
1645 }
1646
1647 /*
1648  * Use mdsthreshold hints set at each OPEN to determine if I/O should go
1649  * to the MDS or over pNFS
1650  *
1651  * The nfs_inode read_io and write_io fields are cumulative counters reset
1652  * when there are no layout segments. Note that in pnfs_update_layout iomode
1653  * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a
1654  * WRITE request.
1655  *
1656  * A return of true means use MDS I/O.
1657  *
1658  * From rfc 5661:
1659  * If a file's size is smaller than the file size threshold, data accesses
1660  * SHOULD be sent to the metadata server.  If an I/O request has a length that
1661  * is below the I/O size threshold, the I/O SHOULD be sent to the metadata
1662  * server.  If both file size and I/O size are provided, the client SHOULD
1663  * reach or exceed  both thresholds before sending its read or write
1664  * requests to the data server.
1665  */
1666 static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
1667                                      struct inode *ino, int iomode)
1668 {
1669         struct nfs4_threshold *t = ctx->mdsthreshold;
1670         struct nfs_inode *nfsi = NFS_I(ino);
1671         loff_t fsize = i_size_read(ino);
1672         bool size = false, size_set = false, io = false, io_set = false, ret = false;
1673
1674         if (t == NULL)
1675                 return ret;
1676
1677         dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n",
1678                 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz);
1679
1680         switch (iomode) {
1681         case IOMODE_READ:
1682                 if (t->bm & THRESHOLD_RD) {
1683                         dprintk("%s fsize %llu\n", __func__, fsize);
1684                         size_set = true;
1685                         if (fsize < t->rd_sz)
1686                                 size = true;
1687                 }
1688                 if (t->bm & THRESHOLD_RD_IO) {
1689                         dprintk("%s nfsi->read_io %llu\n", __func__,
1690                                 nfsi->read_io);
1691                         io_set = true;
1692                         if (nfsi->read_io < t->rd_io_sz)
1693                                 io = true;
1694                 }
1695                 break;
1696         case IOMODE_RW:
1697                 if (t->bm & THRESHOLD_WR) {
1698                         dprintk("%s fsize %llu\n", __func__, fsize);
1699                         size_set = true;
1700                         if (fsize < t->wr_sz)
1701                                 size = true;
1702                 }
1703                 if (t->bm & THRESHOLD_WR_IO) {
1704                         dprintk("%s nfsi->write_io %llu\n", __func__,
1705                                 nfsi->write_io);
1706                         io_set = true;
1707                         if (nfsi->write_io < t->wr_io_sz)
1708                                 io = true;
1709                 }
1710                 break;
1711         }
1712         if (size_set && io_set) {
1713                 if (size && io)
1714                         ret = true;
1715         } else if (size || io)
1716                 ret = true;
1717
1718         dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret);
1719         return ret;
1720 }
1721
1722 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
1723 {
1724         /*
1725          * send layoutcommit as it can hold up layoutreturn due to lseg
1726          * reference
1727          */
1728         pnfs_layoutcommit_inode(lo->plh_inode, false);
1729         return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
1730                                    nfs_wait_bit_killable,
1731                                    TASK_UNINTERRUPTIBLE);
1732 }
1733
1734 static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
1735 {
1736         unsigned long *bitlock = &lo->plh_flags;
1737
1738         clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock);
1739         smp_mb__after_atomic();
1740         wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET);
1741 }
1742
1743 static void _add_to_server_list(struct pnfs_layout_hdr *lo,
1744                                 struct nfs_server *server)
1745 {
1746         if (list_empty(&lo->plh_layouts)) {
1747                 struct nfs_client *clp = server->nfs_client;
1748
1749                 /* The lo must be on the clp list if there is any
1750                  * chance of a CB_LAYOUTRECALL(FILE) coming in.
1751                  */
1752                 spin_lock(&clp->cl_lock);
1753                 if (list_empty(&lo->plh_layouts))
1754                         list_add_tail(&lo->plh_layouts, &server->layouts);
1755                 spin_unlock(&clp->cl_lock);
1756         }
1757 }
1758
1759 /*
1760  * Layout segment is retreived from the server if not cached.
1761  * The appropriate layout segment is referenced and returned to the caller.
1762  */
1763 struct pnfs_layout_segment *
1764 pnfs_update_layout(struct inode *ino,
1765                    struct nfs_open_context *ctx,
1766                    loff_t pos,
1767                    u64 count,
1768                    enum pnfs_iomode iomode,
1769                    bool strict_iomode,
1770                    gfp_t gfp_flags)
1771 {
1772         struct pnfs_layout_range arg = {
1773                 .iomode = iomode,
1774                 .offset = pos,
1775                 .length = count,
1776         };
1777         unsigned pg_offset;
1778         struct nfs_server *server = NFS_SERVER(ino);
1779         struct nfs_client *clp = server->nfs_client;
1780         struct pnfs_layout_hdr *lo = NULL;
1781         struct pnfs_layout_segment *lseg = NULL;
1782         struct nfs4_layoutget *lgp;
1783         nfs4_stateid stateid;
1784         long timeout = 0;
1785         unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
1786         bool first;
1787
1788         if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
1789                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1790                                  PNFS_UPDATE_LAYOUT_NO_PNFS);
1791                 goto out;
1792         }
1793
1794         if (iomode == IOMODE_READ && i_size_read(ino) == 0) {
1795                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1796                                  PNFS_UPDATE_LAYOUT_RD_ZEROLEN);
1797                 goto out;
1798         }
1799
1800         if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
1801                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1802                                  PNFS_UPDATE_LAYOUT_MDSTHRESH);
1803                 goto out;
1804         }
1805
1806 lookup_again:
1807         nfs4_client_recover_expired_lease(clp);
1808         first = false;
1809         spin_lock(&ino->i_lock);
1810         lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
1811         if (lo == NULL) {
1812                 spin_unlock(&ino->i_lock);
1813                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1814                                  PNFS_UPDATE_LAYOUT_NOMEM);
1815                 goto out;
1816         }
1817
1818         /* Do we even need to bother with this? */
1819         if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
1820                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1821                                  PNFS_UPDATE_LAYOUT_BULK_RECALL);
1822                 dprintk("%s matches recall, use MDS\n", __func__);
1823                 goto out_unlock;
1824         }
1825
1826         /* if LAYOUTGET already failed once we don't try again */
1827         if (pnfs_layout_io_test_failed(lo, iomode)) {
1828                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1829                                  PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
1830                 goto out_unlock;
1831         }
1832
1833         lseg = pnfs_find_lseg(lo, &arg, strict_iomode);
1834         if (lseg) {
1835                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1836                                 PNFS_UPDATE_LAYOUT_FOUND_CACHED);
1837                 goto out_unlock;
1838         }
1839
1840         if (!nfs4_valid_open_stateid(ctx->state)) {
1841                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1842                                 PNFS_UPDATE_LAYOUT_INVALID_OPEN);
1843                 goto out_unlock;
1844         }
1845
1846         /*
1847          * Choose a stateid for the LAYOUTGET. If we don't have a layout
1848          * stateid, or it has been invalidated, then we must use the open
1849          * stateid.
1850          */
1851         if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
1852
1853                 /*
1854                  * The first layoutget for the file. Need to serialize per
1855                  * RFC 5661 Errata 3208.
1856                  */
1857                 if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET,
1858                                      &lo->plh_flags)) {
1859                         spin_unlock(&ino->i_lock);
1860                         wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET,
1861                                     TASK_UNINTERRUPTIBLE);
1862                         pnfs_put_layout_hdr(lo);
1863                         dprintk("%s retrying\n", __func__);
1864                         goto lookup_again;
1865                 }
1866
1867                 first = true;
1868                 if (nfs4_select_rw_stateid(ctx->state,
1869                                         iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ,
1870                                         NULL, &stateid, NULL) != 0) {
1871                         trace_pnfs_update_layout(ino, pos, count,
1872                                         iomode, lo, lseg,
1873                                         PNFS_UPDATE_LAYOUT_INVALID_OPEN);
1874                         goto out_unlock;
1875                 }
1876         } else {
1877                 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
1878         }
1879
1880         /*
1881          * Because we free lsegs before sending LAYOUTRETURN, we need to wait
1882          * for LAYOUTRETURN even if first is true.
1883          */
1884         if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
1885                 spin_unlock(&ino->i_lock);
1886                 dprintk("%s wait for layoutreturn\n", __func__);
1887                 if (pnfs_prepare_to_retry_layoutget(lo)) {
1888                         if (first)
1889                                 pnfs_clear_first_layoutget(lo);
1890                         pnfs_put_layout_hdr(lo);
1891                         dprintk("%s retrying\n", __func__);
1892                         trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1893                                         lseg, PNFS_UPDATE_LAYOUT_RETRY);
1894                         goto lookup_again;
1895                 }
1896                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1897                                 PNFS_UPDATE_LAYOUT_RETURN);
1898                 goto out_put_layout_hdr;
1899         }
1900
1901         if (pnfs_layoutgets_blocked(lo)) {
1902                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1903                                 PNFS_UPDATE_LAYOUT_BLOCKED);
1904                 goto out_unlock;
1905         }
1906         atomic_inc(&lo->plh_outstanding);
1907         spin_unlock(&ino->i_lock);
1908
1909         _add_to_server_list(lo, server);
1910
1911         pg_offset = arg.offset & ~PAGE_MASK;
1912         if (pg_offset) {
1913                 arg.offset -= pg_offset;
1914                 arg.length += pg_offset;
1915         }
1916         if (arg.length != NFS4_MAX_UINT64)
1917                 arg.length = PAGE_ALIGN(arg.length);
1918
1919         lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags);
1920         if (!lgp) {
1921                 trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
1922                                          PNFS_UPDATE_LAYOUT_NOMEM);
1923                 atomic_dec(&lo->plh_outstanding);
1924                 goto out_put_layout_hdr;
1925         }
1926
1927         lseg = nfs4_proc_layoutget(lgp, &timeout);
1928         trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1929                                  PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
1930         atomic_dec(&lo->plh_outstanding);
1931         if (IS_ERR(lseg)) {
1932                 switch(PTR_ERR(lseg)) {
1933                 case -EBUSY:
1934                         if (time_after(jiffies, giveup))
1935                                 lseg = NULL;
1936                         break;
1937                 case -ERECALLCONFLICT:
1938                         /* Huh? We hold no layouts, how is there a recall? */
1939                         if (first) {
1940                                 lseg = NULL;
1941                                 break;
1942                         }
1943                         /* Destroy the existing layout and start over */
1944                         if (time_after(jiffies, giveup))
1945                                 pnfs_destroy_layout(NFS_I(ino));
1946                         /* Fallthrough */
1947                 case -EAGAIN:
1948                         break;
1949                 default:
1950                         if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
1951                                 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
1952                                 lseg = NULL;
1953                         }
1954                         goto out_put_layout_hdr;
1955                 }
1956                 if (lseg) {
1957                         if (first)
1958                                 pnfs_clear_first_layoutget(lo);
1959                         trace_pnfs_update_layout(ino, pos, count,
1960                                 iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
1961                         pnfs_put_layout_hdr(lo);
1962                         goto lookup_again;
1963                 }
1964         } else {
1965                 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
1966         }
1967
1968 out_put_layout_hdr:
1969         if (first)
1970                 pnfs_clear_first_layoutget(lo);
1971         pnfs_put_layout_hdr(lo);
1972 out:
1973         dprintk("%s: inode %s/%llu pNFS layout segment %s for "
1974                         "(%s, offset: %llu, length: %llu)\n",
1975                         __func__, ino->i_sb->s_id,
1976                         (unsigned long long)NFS_FILEID(ino),
1977                         IS_ERR_OR_NULL(lseg) ? "not found" : "found",
1978                         iomode==IOMODE_RW ?  "read/write" : "read-only",
1979                         (unsigned long long)pos,
1980                         (unsigned long long)count);
1981         return lseg;
1982 out_unlock:
1983         spin_unlock(&ino->i_lock);
1984         goto out_put_layout_hdr;
1985 }
1986 EXPORT_SYMBOL_GPL(pnfs_update_layout);
1987
1988 static bool
1989 pnfs_sanity_check_layout_range(struct pnfs_layout_range *range)
1990 {
1991         switch (range->iomode) {
1992         case IOMODE_READ:
1993         case IOMODE_RW:
1994                 break;
1995         default:
1996                 return false;
1997         }
1998         if (range->offset == NFS4_MAX_UINT64)
1999                 return false;
2000         if (range->length == 0)
2001                 return false;
2002         if (range->length != NFS4_MAX_UINT64 &&
2003             range->length > NFS4_MAX_UINT64 - range->offset)
2004                 return false;
2005         return true;
2006 }
2007
2008 static struct pnfs_layout_hdr *
2009 _pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx)
2010 {
2011         struct pnfs_layout_hdr *lo;
2012
2013         spin_lock(&ino->i_lock);
2014         lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL);
2015         if (!lo)
2016                 goto out_unlock;
2017         if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
2018                 goto out_unlock;
2019         if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
2020                 goto out_unlock;
2021         if (pnfs_layoutgets_blocked(lo))
2022                 goto out_unlock;
2023         if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags))
2024                 goto out_unlock;
2025         atomic_inc(&lo->plh_outstanding);
2026         spin_unlock(&ino->i_lock);
2027         _add_to_server_list(lo, NFS_SERVER(ino));
2028         return lo;
2029
2030 out_unlock:
2031         spin_unlock(&ino->i_lock);
2032         pnfs_put_layout_hdr(lo);
2033         return NULL;
2034 }
2035
2036 extern const nfs4_stateid current_stateid;
2037
2038 static void _lgopen_prepare_attached(struct nfs4_opendata *data,
2039                                      struct nfs_open_context *ctx)
2040 {
2041         struct inode *ino = data->dentry->d_inode;
2042         struct pnfs_layout_range rng = {
2043                 .iomode = (data->o_arg.fmode & FMODE_WRITE) ?
2044                           IOMODE_RW: IOMODE_READ,
2045                 .offset = 0,
2046                 .length = NFS4_MAX_UINT64,
2047         };
2048         struct nfs4_layoutget *lgp;
2049         struct pnfs_layout_hdr *lo;
2050
2051         /* Heuristic: don't send layoutget if we have cached data */
2052         if (rng.iomode == IOMODE_READ &&
2053            (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0))
2054                 return;
2055
2056         lo = _pnfs_grab_empty_layout(ino, ctx);
2057         if (!lo)
2058                 return;
2059         lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid,
2060                                              &rng, GFP_KERNEL);
2061         if (!lgp) {
2062                 pnfs_clear_first_layoutget(lo);
2063                 pnfs_put_layout_hdr(lo);
2064                 return;
2065         }
2066         data->lgp = lgp;
2067         data->o_arg.lg_args = &lgp->args;
2068         data->o_res.lg_res = &lgp->res;
2069 }
2070
2071 static void _lgopen_prepare_floating(struct nfs4_opendata *data,
2072                                      struct nfs_open_context *ctx)
2073 {
2074         struct pnfs_layout_range rng = {
2075                 .iomode = (data->o_arg.fmode & FMODE_WRITE) ?
2076                           IOMODE_RW: IOMODE_READ,
2077                 .offset = 0,
2078                 .length = NFS4_MAX_UINT64,
2079         };
2080         struct nfs4_layoutget *lgp;
2081
2082         lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, &current_stateid,
2083                                              &rng, GFP_KERNEL);
2084         if (!lgp)
2085                 return;
2086         data->lgp = lgp;
2087         data->o_arg.lg_args = &lgp->args;
2088         data->o_res.lg_res = &lgp->res;
2089 }
2090
2091 void pnfs_lgopen_prepare(struct nfs4_opendata *data,
2092                          struct nfs_open_context *ctx)
2093 {
2094         struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
2095
2096         if (!(pnfs_enabled_sb(server) &&
2097               server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN))
2098                 return;
2099         /* Could check on max_ops, but currently hardcoded high enough */
2100         if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN))
2101                 return;
2102         if (data->state)
2103                 _lgopen_prepare_attached(data, ctx);
2104         else
2105                 _lgopen_prepare_floating(data, ctx);
2106 }
2107
2108 void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
2109                        struct nfs_open_context *ctx)
2110 {
2111         struct pnfs_layout_hdr *lo;
2112         struct pnfs_layout_segment *lseg;
2113         struct nfs_server *srv = NFS_SERVER(ino);
2114         u32 iomode;
2115
2116         if (!lgp)
2117                 return;
2118         dprintk("%s: entered with status %i\n", __func__, lgp->res.status);
2119         if (lgp->res.status) {
2120                 switch (lgp->res.status) {
2121                 default:
2122                         break;
2123                 /*
2124                  * Halt lgopen attempts if the server doesn't recognise
2125                  * the "current stateid" value, the layout type, or the
2126                  * layoutget operation as being valid.
2127                  * Also if it complains about too many ops in the compound
2128                  * or of the request/reply being too big.
2129                  */
2130                 case -NFS4ERR_BAD_STATEID:
2131                 case -NFS4ERR_NOTSUPP:
2132                 case -NFS4ERR_REP_TOO_BIG:
2133                 case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
2134                 case -NFS4ERR_REQ_TOO_BIG:
2135                 case -NFS4ERR_TOO_MANY_OPS:
2136                 case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
2137                         srv->caps &= ~NFS_CAP_LGOPEN;
2138                 }
2139                 return;
2140         }
2141         if (!lgp->args.inode) {
2142                 lo = _pnfs_grab_empty_layout(ino, ctx);
2143                 if (!lo)
2144                         return;
2145                 lgp->args.inode = ino;
2146         } else
2147                 lo = NFS_I(lgp->args.inode)->layout;
2148
2149         if (read_seqcount_retry(&srv->nfs_client->cl_callback_count,
2150                                 lgp->callback_count))
2151                 return;
2152         lseg = pnfs_layout_process(lgp);
2153         if (!IS_ERR(lseg)) {
2154                 iomode = lgp->args.range.iomode;
2155                 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
2156                 pnfs_put_lseg(lseg);
2157         }
2158 }
2159
2160 void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
2161 {
2162         if (lgp != NULL) {
2163                 struct inode *inode = lgp->args.inode;
2164                 if (inode) {
2165                         struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
2166                         atomic_dec(&lo->plh_outstanding);
2167                         pnfs_clear_first_layoutget(lo);
2168                 }
2169                 pnfs_layoutget_free(lgp);
2170         }
2171 }
2172
2173 struct pnfs_layout_segment *
2174 pnfs_layout_process(struct nfs4_layoutget *lgp)
2175 {
2176         struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
2177         struct nfs4_layoutget_res *res = &lgp->res;
2178         struct pnfs_layout_segment *lseg;
2179         struct inode *ino = lo->plh_inode;
2180         LIST_HEAD(free_me);
2181
2182         if (!pnfs_sanity_check_layout_range(&res->range))
2183                 return ERR_PTR(-EINVAL);
2184
2185         /* Inject layout blob into I/O device driver */
2186         lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
2187         if (IS_ERR_OR_NULL(lseg)) {
2188                 if (!lseg)
2189                         lseg = ERR_PTR(-ENOMEM);
2190
2191                 dprintk("%s: Could not allocate layout: error %ld\n",
2192                        __func__, PTR_ERR(lseg));
2193                 return lseg;
2194         }
2195
2196         pnfs_init_lseg(lo, lseg, &res->range, &res->stateid);
2197
2198         spin_lock(&ino->i_lock);
2199         if (pnfs_layoutgets_blocked(lo)) {
2200                 dprintk("%s forget reply due to state\n", __func__);
2201                 goto out_forget;
2202         }
2203
2204         if (!pnfs_layout_is_valid(lo)) {
2205                 /* We have a completely new layout */
2206                 pnfs_set_layout_stateid(lo, &res->stateid, true);
2207         } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
2208                 /* existing state ID, make sure the sequence number matches. */
2209                 if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
2210                         dprintk("%s forget reply due to sequence\n", __func__);
2211                         goto out_forget;
2212                 }
2213                 pnfs_set_layout_stateid(lo, &res->stateid, false);
2214         } else {
2215                 /*
2216                  * We got an entirely new state ID.  Mark all segments for the
2217                  * inode invalid, and retry the layoutget
2218                  */
2219                 pnfs_mark_layout_stateid_invalid(lo, &free_me);
2220                 goto out_forget;
2221         }
2222
2223         pnfs_get_lseg(lseg);
2224         pnfs_layout_insert_lseg(lo, lseg, &free_me);
2225
2226
2227         if (res->return_on_close)
2228                 set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
2229
2230         spin_unlock(&ino->i_lock);
2231         pnfs_free_lseg_list(&free_me);
2232         return lseg;
2233
2234 out_forget:
2235         spin_unlock(&ino->i_lock);
2236         lseg->pls_layout = lo;
2237         NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
2238         return ERR_PTR(-EAGAIN);
2239 }
2240
2241 /**
2242  * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
2243  * @lo: pointer to layout header
2244  * @tmp_list: list header to be used with pnfs_free_lseg_list()
2245  * @return_range: describe layout segment ranges to be returned
2246  *
2247  * This function is mainly intended for use by layoutrecall. It attempts
2248  * to free the layout segment immediately, or else to mark it for return
2249  * as soon as its reference count drops to zero.
2250  */
2251 int
2252 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
2253                                 struct list_head *tmp_list,
2254                                 const struct pnfs_layout_range *return_range,
2255                                 u32 seq)
2256 {
2257         struct pnfs_layout_segment *lseg, *next;
2258         int remaining = 0;
2259
2260         dprintk("%s:Begin lo %p\n", __func__, lo);
2261
2262         if (list_empty(&lo->plh_segs))
2263                 return 0;
2264
2265         assert_spin_locked(&lo->plh_inode->i_lock);
2266
2267         list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
2268                 if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
2269                         dprintk("%s: marking lseg %p iomode %d "
2270                                 "offset %llu length %llu\n", __func__,
2271                                 lseg, lseg->pls_range.iomode,
2272                                 lseg->pls_range.offset,
2273                                 lseg->pls_range.length);
2274                         if (mark_lseg_invalid(lseg, tmp_list))
2275                                 continue;
2276                         remaining++;
2277                         set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
2278                 }
2279
2280         if (remaining)
2281                 pnfs_set_plh_return_info(lo, return_range->iomode, seq);
2282
2283         return remaining;
2284 }
2285
2286 void pnfs_error_mark_layout_for_return(struct inode *inode,
2287                                        struct pnfs_layout_segment *lseg)
2288 {
2289         struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
2290         struct pnfs_layout_range range = {
2291                 .iomode = lseg->pls_range.iomode,
2292                 .offset = 0,
2293                 .length = NFS4_MAX_UINT64,
2294         };
2295         bool return_now = false;
2296
2297         spin_lock(&inode->i_lock);
2298         if (!pnfs_layout_is_valid(lo)) {
2299                 spin_unlock(&inode->i_lock);
2300                 return;
2301         }
2302         pnfs_set_plh_return_info(lo, range.iomode, 0);
2303         /*
2304          * mark all matching lsegs so that we are sure to have no live
2305          * segments at hand when sending layoutreturn. See pnfs_put_lseg()
2306          * for how it works.
2307          */
2308         if (!pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0)) {
2309                 nfs4_stateid stateid;
2310                 enum pnfs_iomode iomode;
2311
2312                 return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
2313                 spin_unlock(&inode->i_lock);
2314                 if (return_now)
2315                         pnfs_send_layoutreturn(lo, &stateid, iomode, false);
2316         } else {
2317                 spin_unlock(&inode->i_lock);
2318                 nfs_commit_inode(inode, 0);
2319         }
2320 }
2321 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
2322
2323 void
2324 pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
2325 {
2326         if (pgio->pg_lseg == NULL ||
2327             test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
2328                 return;
2329         pnfs_put_lseg(pgio->pg_lseg);
2330         pgio->pg_lseg = NULL;
2331 }
2332 EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
2333
2334 /*
2335  * Check for any intersection between the request and the pgio->pg_lseg,
2336  * and if none, put this pgio->pg_lseg away.
2337  */
2338 static void
2339 pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
2340 {
2341         if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
2342                 pnfs_put_lseg(pgio->pg_lseg);
2343                 pgio->pg_lseg = NULL;
2344         }
2345 }
2346
2347 void
2348 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
2349 {
2350         u64 rd_size = req->wb_bytes;
2351
2352         pnfs_generic_pg_check_layout(pgio);
2353         pnfs_generic_pg_check_range(pgio, req);
2354         if (pgio->pg_lseg == NULL) {
2355                 if (pgio->pg_dreq == NULL)
2356                         rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
2357                 else
2358                         rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
2359
2360                 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
2361                                                    req->wb_context,
2362                                                    req_offset(req),
2363                                                    rd_size,
2364                                                    IOMODE_READ,
2365                                                    false,
2366                                                    GFP_KERNEL);
2367                 if (IS_ERR(pgio->pg_lseg)) {
2368                         pgio->pg_error = PTR_ERR(pgio->pg_lseg);
2369                         pgio->pg_lseg = NULL;
2370                         return;
2371                 }
2372         }
2373         /* If no lseg, fall back to read through mds */
2374         if (pgio->pg_lseg == NULL)
2375                 nfs_pageio_reset_read_mds(pgio);
2376
2377 }
2378 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
2379
2380 void
2381 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
2382                            struct nfs_page *req, u64 wb_size)
2383 {
2384         pnfs_generic_pg_check_layout(pgio);
2385         pnfs_generic_pg_check_range(pgio, req);
2386         if (pgio->pg_lseg == NULL) {
2387                 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
2388                                                    req->wb_context,
2389                                                    req_offset(req),
2390                                                    wb_size,
2391                                                    IOMODE_RW,
2392                                                    false,
2393                                                    GFP_NOFS);
2394                 if (IS_ERR(pgio->pg_lseg)) {
2395                         pgio->pg_error = PTR_ERR(pgio->pg_lseg);
2396                         pgio->pg_lseg = NULL;
2397                         return;
2398                 }
2399         }
2400         /* If no lseg, fall back to write through mds */
2401         if (pgio->pg_lseg == NULL)
2402                 nfs_pageio_reset_write_mds(pgio);
2403 }
2404 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
2405
2406 void
2407 pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc)
2408 {
2409         if (desc->pg_lseg) {
2410                 pnfs_put_lseg(desc->pg_lseg);
2411                 desc->pg_lseg = NULL;
2412         }
2413 }
2414 EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup);
2415
2416 /*
2417  * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
2418  * of bytes (maximum @req->wb_bytes) that can be coalesced.
2419  */
2420 size_t
2421 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
2422                      struct nfs_page *prev, struct nfs_page *req)
2423 {
2424         unsigned int size;
2425         u64 seg_end, req_start, seg_left;
2426
2427         size = nfs_generic_pg_test(pgio, prev, req);
2428         if (!size)
2429                 return 0;
2430
2431         /*
2432          * 'size' contains the number of bytes left in the current page (up
2433          * to the original size asked for in @req->wb_bytes).
2434          *
2435          * Calculate how many bytes are left in the layout segment
2436          * and if there are less bytes than 'size', return that instead.
2437          *
2438          * Please also note that 'end_offset' is actually the offset of the
2439          * first byte that lies outside the pnfs_layout_range. FIXME?
2440          *
2441          */
2442         if (pgio->pg_lseg) {
2443                 seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
2444                                      pgio->pg_lseg->pls_range.length);
2445                 req_start = req_offset(req);
2446
2447                 /* start of request is past the last byte of this segment */
2448                 if (req_start >= seg_end)
2449                         return 0;
2450
2451                 /* adjust 'size' iff there are fewer bytes left in the
2452                  * segment than what nfs_generic_pg_test returned */
2453                 seg_left = seg_end - req_start;
2454                 if (seg_left < size)
2455                         size = (unsigned int)seg_left;
2456         }
2457
2458         return size;
2459 }
2460 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
2461
2462 int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
2463 {
2464         struct nfs_pageio_descriptor pgio;
2465
2466         /* Resend all requests through the MDS */
2467         nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
2468                               hdr->completion_ops);
2469         set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
2470         return nfs_pageio_resend(&pgio, hdr);
2471 }
2472 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
2473
2474 static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
2475 {
2476
2477         dprintk("pnfs write error = %d\n", hdr->pnfs_error);
2478         if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
2479             PNFS_LAYOUTRET_ON_ERROR) {
2480                 pnfs_return_layout(hdr->inode);
2481         }
2482         if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
2483                 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
2484 }
2485
2486 /*
2487  * Called by non rpc-based layout drivers
2488  */
2489 void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
2490 {
2491         if (likely(!hdr->pnfs_error)) {
2492                 pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
2493                                 hdr->mds_offset + hdr->res.count);
2494                 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
2495         }
2496         trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
2497         if (unlikely(hdr->pnfs_error))
2498                 pnfs_ld_handle_write_error(hdr);
2499         hdr->mds_ops->rpc_release(hdr);
2500 }
2501 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
2502
2503 static void
2504 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
2505                 struct nfs_pgio_header *hdr)
2506 {
2507         struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2508
2509         if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2510                 list_splice_tail_init(&hdr->pages, &mirror->pg_list);
2511                 nfs_pageio_reset_write_mds(desc);
2512                 mirror->pg_recoalesce = 1;
2513         }
2514         hdr->completion_ops->completion(hdr);
2515 }
2516
2517 static enum pnfs_try_status
2518 pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
2519                         const struct rpc_call_ops *call_ops,
2520                         struct pnfs_layout_segment *lseg,
2521                         int how)
2522 {
2523         struct inode *inode = hdr->inode;
2524         enum pnfs_try_status trypnfs;
2525         struct nfs_server *nfss = NFS_SERVER(inode);
2526
2527         hdr->mds_ops = call_ops;
2528
2529         dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
2530                 inode->i_ino, hdr->args.count, hdr->args.offset, how);
2531         trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
2532         if (trypnfs != PNFS_NOT_ATTEMPTED)
2533                 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
2534         dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
2535         return trypnfs;
2536 }
2537
2538 static void
2539 pnfs_do_write(struct nfs_pageio_descriptor *desc,
2540               struct nfs_pgio_header *hdr, int how)
2541 {
2542         const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
2543         struct pnfs_layout_segment *lseg = desc->pg_lseg;
2544         enum pnfs_try_status trypnfs;
2545
2546         trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
2547         switch (trypnfs) {
2548         case PNFS_NOT_ATTEMPTED:
2549                 pnfs_write_through_mds(desc, hdr);
2550         case PNFS_ATTEMPTED:
2551                 break;
2552         case PNFS_TRY_AGAIN:
2553                 /* cleanup hdr and prepare to redo pnfs */
2554                 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2555                         struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2556                         list_splice_init(&hdr->pages, &mirror->pg_list);
2557                         mirror->pg_recoalesce = 1;
2558                 }
2559                 hdr->mds_ops->rpc_release(hdr);
2560         }
2561 }
2562
2563 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
2564 {
2565         pnfs_put_lseg(hdr->lseg);
2566         nfs_pgio_header_free(hdr);
2567 }
2568
2569 int
2570 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
2571 {
2572         struct nfs_pgio_header *hdr;
2573         int ret;
2574
2575         hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
2576         if (!hdr) {
2577                 desc->pg_error = -ENOMEM;
2578                 return desc->pg_error;
2579         }
2580         nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
2581
2582         hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
2583         ret = nfs_generic_pgio(desc, hdr);
2584         if (!ret)
2585                 pnfs_do_write(desc, hdr, desc->pg_ioflags);
2586
2587         return ret;
2588 }
2589 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
2590
2591 int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
2592 {
2593         struct nfs_pageio_descriptor pgio;
2594
2595         /* Resend all requests through the MDS */
2596         nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
2597         return nfs_pageio_resend(&pgio, hdr);
2598 }
2599 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
2600
2601 static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
2602 {
2603         dprintk("pnfs read error = %d\n", hdr->pnfs_error);
2604         if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
2605             PNFS_LAYOUTRET_ON_ERROR) {
2606                 pnfs_return_layout(hdr->inode);
2607         }
2608         if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
2609                 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
2610 }
2611
2612 /*
2613  * Called by non rpc-based layout drivers
2614  */
2615 void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
2616 {
2617         if (likely(!hdr->pnfs_error))
2618                 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
2619         trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
2620         if (unlikely(hdr->pnfs_error))
2621                 pnfs_ld_handle_read_error(hdr);
2622         hdr->mds_ops->rpc_release(hdr);
2623 }
2624 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
2625
2626 static void
2627 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
2628                 struct nfs_pgio_header *hdr)
2629 {
2630         struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2631
2632         if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2633                 list_splice_tail_init(&hdr->pages, &mirror->pg_list);
2634                 nfs_pageio_reset_read_mds(desc);
2635                 mirror->pg_recoalesce = 1;
2636         }
2637         hdr->completion_ops->completion(hdr);
2638 }
2639
2640 /*
2641  * Call the appropriate parallel I/O subsystem read function.
2642  */
2643 static enum pnfs_try_status
2644 pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
2645                        const struct rpc_call_ops *call_ops,
2646                        struct pnfs_layout_segment *lseg)
2647 {
2648         struct inode *inode = hdr->inode;
2649         struct nfs_server *nfss = NFS_SERVER(inode);
2650         enum pnfs_try_status trypnfs;
2651
2652         hdr->mds_ops = call_ops;
2653
2654         dprintk("%s: Reading ino:%lu %u@%llu\n",
2655                 __func__, inode->i_ino, hdr->args.count, hdr->args.offset);
2656
2657         trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
2658         if (trypnfs != PNFS_NOT_ATTEMPTED)
2659                 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
2660         dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
2661         return trypnfs;
2662 }
2663
2664 /* Resend all requests through pnfs. */
2665 void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr)
2666 {
2667         struct nfs_pageio_descriptor pgio;
2668
2669         if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2670                 /* Prevent deadlocks with layoutreturn! */
2671                 pnfs_put_lseg(hdr->lseg);
2672                 hdr->lseg = NULL;
2673
2674                 nfs_pageio_init_read(&pgio, hdr->inode, false,
2675                                         hdr->completion_ops);
2676                 hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr);
2677         }
2678 }
2679 EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs);
2680
2681 static void
2682 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
2683 {
2684         const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
2685         struct pnfs_layout_segment *lseg = desc->pg_lseg;
2686         enum pnfs_try_status trypnfs;
2687
2688         trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
2689         switch (trypnfs) {
2690         case PNFS_NOT_ATTEMPTED:
2691                 pnfs_read_through_mds(desc, hdr);
2692         case PNFS_ATTEMPTED:
2693                 break;
2694         case PNFS_TRY_AGAIN:
2695                 /* cleanup hdr and prepare to redo pnfs */
2696                 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2697                         struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2698                         list_splice_init(&hdr->pages, &mirror->pg_list);
2699                         mirror->pg_recoalesce = 1;
2700                 }
2701                 hdr->mds_ops->rpc_release(hdr);
2702         }
2703 }
2704
2705 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
2706 {
2707         pnfs_put_lseg(hdr->lseg);
2708         nfs_pgio_header_free(hdr);
2709 }
2710
2711 int
2712 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
2713 {
2714         struct nfs_pgio_header *hdr;
2715         int ret;
2716
2717         hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
2718         if (!hdr) {
2719                 desc->pg_error = -ENOMEM;
2720                 return desc->pg_error;
2721         }
2722         nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
2723         hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
2724         ret = nfs_generic_pgio(desc, hdr);
2725         if (!ret)
2726                 pnfs_do_read(desc, hdr);
2727         return ret;
2728 }
2729 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
2730
2731 static void pnfs_clear_layoutcommitting(struct inode *inode)
2732 {
2733         unsigned long *bitlock = &NFS_I(inode)->flags;
2734
2735         clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
2736         smp_mb__after_atomic();
2737         wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
2738 }
2739
2740 /*
2741  * There can be multiple RW segments.
2742  */
2743 static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
2744 {
2745         struct pnfs_layout_segment *lseg;
2746
2747         list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
2748                 if (lseg->pls_range.iomode == IOMODE_RW &&
2749                     test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
2750                         list_add(&lseg->pls_lc_list, listp);
2751         }
2752 }
2753
2754 static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
2755 {
2756         struct pnfs_layout_segment *lseg, *tmp;
2757
2758         /* Matched by references in pnfs_set_layoutcommit */
2759         list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
2760                 list_del_init(&lseg->pls_lc_list);
2761                 pnfs_put_lseg(lseg);
2762         }
2763
2764         pnfs_clear_layoutcommitting(inode);
2765 }
2766
2767 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
2768 {
2769         pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
2770 }
2771 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
2772
2773 void
2774 pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg,
2775                 loff_t end_pos)
2776 {
2777         struct nfs_inode *nfsi = NFS_I(inode);
2778         bool mark_as_dirty = false;
2779
2780         spin_lock(&inode->i_lock);
2781         if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
2782                 nfsi->layout->plh_lwb = end_pos;
2783                 mark_as_dirty = true;
2784                 dprintk("%s: Set layoutcommit for inode %lu ",
2785                         __func__, inode->i_ino);
2786         } else if (end_pos > nfsi->layout->plh_lwb)
2787                 nfsi->layout->plh_lwb = end_pos;
2788         if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) {
2789                 /* references matched in nfs4_layoutcommit_release */
2790                 pnfs_get_lseg(lseg);
2791         }
2792         spin_unlock(&inode->i_lock);
2793         dprintk("%s: lseg %p end_pos %llu\n",
2794                 __func__, lseg, nfsi->layout->plh_lwb);
2795
2796         /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
2797          * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
2798         if (mark_as_dirty)
2799                 mark_inode_dirty_sync(inode);
2800 }
2801 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
2802
2803 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
2804 {
2805         struct nfs_server *nfss = NFS_SERVER(data->args.inode);
2806
2807         if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
2808                 nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
2809         pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
2810 }
2811
2812 /*
2813  * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
2814  * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
2815  * data to disk to allow the server to recover the data if it crashes.
2816  * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
2817  * is off, and a COMMIT is sent to a data server, or
2818  * if WRITEs to a data server return NFS_DATA_SYNC.
2819  */
2820 int
2821 pnfs_layoutcommit_inode(struct inode *inode, bool sync)
2822 {
2823         struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
2824         struct nfs4_layoutcommit_data *data;
2825         struct nfs_inode *nfsi = NFS_I(inode);
2826         loff_t end_pos;
2827         int status;
2828
2829         if (!pnfs_layoutcommit_outstanding(inode))
2830                 return 0;
2831
2832         dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
2833
2834         status = -EAGAIN;
2835         if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
2836                 if (!sync)
2837                         goto out;
2838                 status = wait_on_bit_lock_action(&nfsi->flags,
2839                                 NFS_INO_LAYOUTCOMMITTING,
2840                                 nfs_wait_bit_killable,
2841                                 TASK_KILLABLE);
2842                 if (status)
2843                         goto out;
2844         }
2845
2846         status = -ENOMEM;
2847         /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
2848         data = kzalloc(sizeof(*data), GFP_NOFS);
2849         if (!data)
2850                 goto clear_layoutcommitting;
2851
2852         status = 0;
2853         spin_lock(&inode->i_lock);
2854         if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
2855                 goto out_unlock;
2856
2857         INIT_LIST_HEAD(&data->lseg_list);
2858         pnfs_list_write_lseg(inode, &data->lseg_list);
2859
2860         end_pos = nfsi->layout->plh_lwb;
2861
2862         nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
2863         spin_unlock(&inode->i_lock);
2864
2865         data->args.inode = inode;
2866         data->cred = get_rpccred(nfsi->layout->plh_lc_cred);
2867         nfs_fattr_init(&data->fattr);
2868         data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
2869         data->res.fattr = &data->fattr;
2870         if (end_pos != 0)
2871                 data->args.lastbytewritten = end_pos - 1;
2872         else
2873                 data->args.lastbytewritten = U64_MAX;
2874         data->res.server = NFS_SERVER(inode);
2875
2876         if (ld->prepare_layoutcommit) {
2877                 status = ld->prepare_layoutcommit(&data->args);
2878                 if (status) {
2879                         put_rpccred(data->cred);
2880                         spin_lock(&inode->i_lock);
2881                         set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
2882                         if (end_pos > nfsi->layout->plh_lwb)
2883                                 nfsi->layout->plh_lwb = end_pos;
2884                         goto out_unlock;
2885                 }
2886         }
2887
2888
2889         status = nfs4_proc_layoutcommit(data, sync);
2890 out:
2891         if (status)
2892                 mark_inode_dirty_sync(inode);
2893         dprintk("<-- %s status %d\n", __func__, status);
2894         return status;
2895 out_unlock:
2896         spin_unlock(&inode->i_lock);
2897         kfree(data);
2898 clear_layoutcommitting:
2899         pnfs_clear_layoutcommitting(inode);
2900         goto out;
2901 }
2902 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
2903
2904 int
2905 pnfs_generic_sync(struct inode *inode, bool datasync)
2906 {
2907         return pnfs_layoutcommit_inode(inode, true);
2908 }
2909 EXPORT_SYMBOL_GPL(pnfs_generic_sync);
2910
2911 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
2912 {
2913         struct nfs4_threshold *thp;
2914
2915         thp = kzalloc(sizeof(*thp), GFP_NOFS);
2916         if (!thp) {
2917                 dprintk("%s mdsthreshold allocation failed\n", __func__);
2918                 return NULL;
2919         }
2920         return thp;
2921 }
2922
2923 #if IS_ENABLED(CONFIG_NFS_V4_2)
2924 int
2925 pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
2926 {
2927         struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
2928         struct nfs_server *server = NFS_SERVER(inode);
2929         struct nfs_inode *nfsi = NFS_I(inode);
2930         struct nfs42_layoutstat_data *data;
2931         struct pnfs_layout_hdr *hdr;
2932         int status = 0;
2933
2934         if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats)
2935                 goto out;
2936
2937         if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS))
2938                 goto out;
2939
2940         if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags))
2941                 goto out;
2942
2943         spin_lock(&inode->i_lock);
2944         if (!NFS_I(inode)->layout) {
2945                 spin_unlock(&inode->i_lock);
2946                 goto out_clear_layoutstats;
2947         }
2948         hdr = NFS_I(inode)->layout;
2949         pnfs_get_layout_hdr(hdr);
2950         spin_unlock(&inode->i_lock);
2951
2952         data = kzalloc(sizeof(*data), gfp_flags);
2953         if (!data) {
2954                 status = -ENOMEM;
2955                 goto out_put;
2956         }
2957
2958         data->args.fh = NFS_FH(inode);
2959         data->args.inode = inode;
2960         status = ld->prepare_layoutstats(&data->args);
2961         if (status)
2962                 goto out_free;
2963
2964         status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data);
2965
2966 out:
2967         dprintk("%s returns %d\n", __func__, status);
2968         return status;
2969
2970 out_free:
2971         kfree(data);
2972 out_put:
2973         pnfs_put_layout_hdr(hdr);
2974 out_clear_layoutstats:
2975         smp_mb__before_atomic();
2976         clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags);
2977         smp_mb__after_atomic();
2978         goto out;
2979 }
2980 EXPORT_SYMBOL_GPL(pnfs_report_layoutstat);
2981 #endif
2982
2983 unsigned int layoutstats_timer;
2984 module_param(layoutstats_timer, uint, 0644);
2985 EXPORT_SYMBOL_GPL(layoutstats_timer);
This page took 0.200931 seconds and 4 git commands to generate.