]>
Commit | Line | Data |
---|---|---|
1659185f AR |
1 | /* |
2 | * Copyright (c) 2016 Oracle. All rights reserved. | |
3 | * | |
4 | * This software is available to you under a choice of one of two | |
5 | * licenses. You may choose to be licensed under the terms of the GNU | |
6 | * General Public License (GPL) Version 2, available from the file | |
7 | * COPYING in the main directory of this source tree, or the | |
8 | * OpenIB.org BSD license below: | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or | |
11 | * without modification, are permitted provided that the following | |
12 | * conditions are met: | |
13 | * | |
14 | * - Redistributions of source code must retain the above | |
15 | * copyright notice, this list of conditions and the following | |
16 | * disclaimer. | |
17 | * | |
18 | * - Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials | |
21 | * provided with the distribution. | |
22 | * | |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
30 | * SOFTWARE. | |
31 | */ | |
32 | ||
33 | #include "ib_mr.h" | |
34 | ||
3a2886cc GR |
35 | static inline void |
36 | rds_transition_frwr_state(struct rds_ib_mr *ibmr, | |
37 | enum rds_ib_fr_state old_state, | |
38 | enum rds_ib_fr_state new_state) | |
39 | { | |
40 | if (cmpxchg(&ibmr->u.frmr.fr_state, | |
41 | old_state, new_state) == old_state && | |
42 | old_state == FRMR_IS_INUSE) { | |
43 | /* enforce order of ibmr->u.frmr.fr_state update | |
44 | * before decrementing i_fastreg_inuse_count | |
45 | */ | |
46 | smp_mb__before_atomic(); | |
47 | atomic_dec(&ibmr->ic->i_fastreg_inuse_count); | |
48 | if (waitqueue_active(&rds_ib_ring_empty_wait)) | |
49 | wake_up(&rds_ib_ring_empty_wait); | |
50 | } | |
51 | } | |
52 | ||
1659185f AR |
53 | static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, |
54 | int npages) | |
55 | { | |
56 | struct rds_ib_mr_pool *pool; | |
57 | struct rds_ib_mr *ibmr = NULL; | |
58 | struct rds_ib_frmr *frmr; | |
59 | int err = 0; | |
60 | ||
61 | if (npages <= RDS_MR_8K_MSG_SIZE) | |
62 | pool = rds_ibdev->mr_8k_pool; | |
63 | else | |
64 | pool = rds_ibdev->mr_1m_pool; | |
65 | ||
66 | ibmr = rds_ib_try_reuse_ibmr(pool); | |
67 | if (ibmr) | |
68 | return ibmr; | |
69 | ||
70 | ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, | |
71 | rdsibdev_to_node(rds_ibdev)); | |
72 | if (!ibmr) { | |
73 | err = -ENOMEM; | |
74 | goto out_no_cigar; | |
75 | } | |
76 | ||
77 | frmr = &ibmr->u.frmr; | |
78 | frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG, | |
07549ee2 | 79 | pool->max_pages); |
1659185f AR |
80 | if (IS_ERR(frmr->mr)) { |
81 | pr_warn("RDS/IB: %s failed to allocate MR", __func__); | |
5941923d | 82 | err = PTR_ERR(frmr->mr); |
1659185f AR |
83 | goto out_no_cigar; |
84 | } | |
85 | ||
86 | ibmr->pool = pool; | |
87 | if (pool->pool_type == RDS_IB_MR_8K_POOL) | |
88 | rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc); | |
89 | else | |
90 | rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc); | |
91 | ||
92 | if (atomic_read(&pool->item_count) > pool->max_items_soft) | |
93 | pool->max_items_soft = pool->max_items; | |
94 | ||
95 | frmr->fr_state = FRMR_IS_FREE; | |
2c7da8e6 | 96 | init_waitqueue_head(&frmr->fr_inv_done); |
5f33141d | 97 | init_waitqueue_head(&frmr->fr_reg_done); |
1659185f AR |
98 | return ibmr; |
99 | ||
100 | out_no_cigar: | |
101 | kfree(ibmr); | |
102 | atomic_dec(&pool->item_count); | |
103 | return ERR_PTR(err); | |
104 | } | |
105 | ||
106 | static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop) | |
107 | { | |
108 | struct rds_ib_mr_pool *pool = ibmr->pool; | |
109 | ||
110 | if (drop) | |
111 | llist_add(&ibmr->llnode, &pool->drop_list); | |
112 | else | |
113 | llist_add(&ibmr->llnode, &pool->free_list); | |
114 | atomic_add(ibmr->sg_len, &pool->free_pinned); | |
115 | atomic_inc(&pool->dirty_count); | |
116 | ||
117 | /* If we've pinned too many pages, request a flush */ | |
118 | if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || | |
119 | atomic_read(&pool->dirty_count) >= pool->max_items / 5) | |
120 | queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); | |
121 | } | |
122 | ||
123 | static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) | |
124 | { | |
125 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | |
1659185f | 126 | struct ib_reg_wr reg_wr; |
3e56c2f8 | 127 | int ret, off = 0; |
1659185f AR |
128 | |
129 | while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { | |
130 | atomic_inc(&ibmr->ic->i_fastreg_wrs); | |
131 | cpu_relax(); | |
132 | } | |
133 | ||
fb4b1373 | 134 | ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, |
3e56c2f8 | 135 | &off, PAGE_SIZE); |
fb4b1373 | 136 | if (unlikely(ret != ibmr->sg_dma_len)) |
1659185f AR |
137 | return ret < 0 ? ret : -EINVAL; |
138 | ||
3a2886cc GR |
139 | if (cmpxchg(&frmr->fr_state, |
140 | FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) | |
141 | return -EBUSY; | |
142 | ||
143 | atomic_inc(&ibmr->ic->i_fastreg_inuse_count); | |
144 | ||
1659185f AR |
145 | /* Perform a WR for the fast_reg_mr. Each individual page |
146 | * in the sg list is added to the fast reg page list and placed | |
147 | * inside the fast_reg_mr WR. The key used is a rolling 8bit | |
148 | * counter, which should guarantee uniqueness. | |
149 | */ | |
150 | ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); | |
5f33141d | 151 | frmr->fr_reg = true; |
1659185f AR |
152 | |
153 | memset(®_wr, 0, sizeof(reg_wr)); | |
154 | reg_wr.wr.wr_id = (unsigned long)(void *)ibmr; | |
155 | reg_wr.wr.opcode = IB_WR_REG_MR; | |
156 | reg_wr.wr.num_sge = 0; | |
157 | reg_wr.mr = frmr->mr; | |
158 | reg_wr.key = frmr->mr->rkey; | |
159 | reg_wr.access = IB_ACCESS_LOCAL_WRITE | | |
160 | IB_ACCESS_REMOTE_READ | | |
161 | IB_ACCESS_REMOTE_WRITE; | |
162 | reg_wr.wr.send_flags = IB_SEND_SIGNALED; | |
163 | ||
f112d53b | 164 | ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL); |
1659185f AR |
165 | if (unlikely(ret)) { |
166 | /* Failure here can be because of -ENOMEM as well */ | |
3a2886cc GR |
167 | rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
168 | ||
1659185f AR |
169 | atomic_inc(&ibmr->ic->i_fastreg_wrs); |
170 | if (printk_ratelimit()) | |
171 | pr_warn("RDS/IB: %s returned error(%d)\n", | |
172 | __func__, ret); | |
5f33141d | 173 | goto out; |
1659185f | 174 | } |
5f33141d GR |
175 | |
176 | /* Wait for the registration to complete in order to prevent an invalid | |
177 | * access error resulting from a race between the memory region already | |
178 | * being accessed while registration is still pending. | |
179 | */ | |
180 | wait_event(frmr->fr_reg_done, !frmr->fr_reg); | |
181 | ||
182 | out: | |
183 | ||
1659185f AR |
184 | return ret; |
185 | } | |
186 | ||
187 | static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev, | |
188 | struct rds_ib_mr_pool *pool, | |
189 | struct rds_ib_mr *ibmr, | |
190 | struct scatterlist *sg, unsigned int sg_len) | |
191 | { | |
192 | struct ib_device *dev = rds_ibdev->dev; | |
193 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | |
194 | int i; | |
195 | u32 len; | |
196 | int ret = 0; | |
197 | ||
198 | /* We want to teardown old ibmr values here and fill it up with | |
199 | * new sg values | |
200 | */ | |
201 | rds_ib_teardown_mr(ibmr); | |
202 | ||
203 | ibmr->sg = sg; | |
204 | ibmr->sg_len = sg_len; | |
205 | ibmr->sg_dma_len = 0; | |
206 | frmr->sg_byte_len = 0; | |
207 | WARN_ON(ibmr->sg_dma_len); | |
208 | ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len, | |
209 | DMA_BIDIRECTIONAL); | |
210 | if (unlikely(!ibmr->sg_dma_len)) { | |
211 | pr_warn("RDS/IB: %s failed!\n", __func__); | |
212 | return -EBUSY; | |
213 | } | |
214 | ||
215 | frmr->sg_byte_len = 0; | |
216 | frmr->dma_npages = 0; | |
217 | len = 0; | |
218 | ||
219 | ret = -EINVAL; | |
220 | for (i = 0; i < ibmr->sg_dma_len; ++i) { | |
a163afc8 BVA |
221 | unsigned int dma_len = sg_dma_len(&ibmr->sg[i]); |
222 | u64 dma_addr = sg_dma_address(&ibmr->sg[i]); | |
1659185f AR |
223 | |
224 | frmr->sg_byte_len += dma_len; | |
225 | if (dma_addr & ~PAGE_MASK) { | |
226 | if (i > 0) | |
227 | goto out_unmap; | |
228 | else | |
229 | ++frmr->dma_npages; | |
230 | } | |
231 | ||
232 | if ((dma_addr + dma_len) & ~PAGE_MASK) { | |
233 | if (i < ibmr->sg_dma_len - 1) | |
234 | goto out_unmap; | |
235 | else | |
236 | ++frmr->dma_npages; | |
237 | } | |
238 | ||
239 | len += dma_len; | |
240 | } | |
241 | frmr->dma_npages += len >> PAGE_SHIFT; | |
242 | ||
07549ee2 | 243 | if (frmr->dma_npages > ibmr->pool->max_pages) { |
1659185f AR |
244 | ret = -EMSGSIZE; |
245 | goto out_unmap; | |
246 | } | |
247 | ||
248 | ret = rds_ib_post_reg_frmr(ibmr); | |
249 | if (ret) | |
250 | goto out_unmap; | |
251 | ||
252 | if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) | |
253 | rds_ib_stats_inc(s_ib_rdma_mr_8k_used); | |
254 | else | |
255 | rds_ib_stats_inc(s_ib_rdma_mr_1m_used); | |
256 | ||
257 | return ret; | |
258 | ||
259 | out_unmap: | |
260 | ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len, | |
261 | DMA_BIDIRECTIONAL); | |
262 | ibmr->sg_dma_len = 0; | |
263 | return ret; | |
264 | } | |
265 | ||
266 | static int rds_ib_post_inv(struct rds_ib_mr *ibmr) | |
267 | { | |
f112d53b | 268 | struct ib_send_wr *s_wr; |
1659185f AR |
269 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; |
270 | struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id; | |
271 | int ret = -EINVAL; | |
272 | ||
273 | if (!i_cm_id || !i_cm_id->qp || !frmr->mr) | |
274 | goto out; | |
275 | ||
276 | if (frmr->fr_state != FRMR_IS_INUSE) | |
277 | goto out; | |
278 | ||
a5520788 GR |
279 | while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { |
280 | atomic_inc(&ibmr->ic->i_fastreg_wrs); | |
1659185f AR |
281 | cpu_relax(); |
282 | } | |
283 | ||
284 | frmr->fr_inv = true; | |
285 | s_wr = &frmr->fr_wr; | |
286 | ||
287 | memset(s_wr, 0, sizeof(*s_wr)); | |
288 | s_wr->wr_id = (unsigned long)(void *)ibmr; | |
289 | s_wr->opcode = IB_WR_LOCAL_INV; | |
290 | s_wr->ex.invalidate_rkey = frmr->mr->rkey; | |
291 | s_wr->send_flags = IB_SEND_SIGNALED; | |
292 | ||
f112d53b | 293 | ret = ib_post_send(i_cm_id->qp, s_wr, NULL); |
1659185f | 294 | if (unlikely(ret)) { |
3a2886cc | 295 | rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
1659185f | 296 | frmr->fr_inv = false; |
3a2886cc GR |
297 | /* enforce order of frmr->fr_inv update |
298 | * before incrementing i_fastreg_wrs | |
299 | */ | |
300 | smp_mb__before_atomic(); | |
a5520788 | 301 | atomic_inc(&ibmr->ic->i_fastreg_wrs); |
1659185f AR |
302 | pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); |
303 | goto out; | |
304 | } | |
5f33141d GR |
305 | |
306 | /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to | |
307 | * 1) avoid a silly bouncing between "clean_list" and "drop_list" | |
308 | * triggered by function "rds_ib_reg_frmr" as it is releases frmr | |
309 | * regions whose state is not "FRMR_IS_FREE" right away. | |
310 | * 2) prevents an invalid access error in a race | |
311 | * from a pending "IB_WR_LOCAL_INV" operation | |
312 | * with a teardown ("dma_unmap_sg", "put_page") | |
313 | * and de-registration ("ib_dereg_mr") of the corresponding | |
314 | * memory region. | |
315 | */ | |
316 | wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); | |
317 | ||
1659185f AR |
318 | out: |
319 | return ret; | |
320 | } | |
321 | ||
322 | void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) | |
323 | { | |
324 | struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id; | |
325 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | |
326 | ||
327 | if (wc->status != IB_WC_SUCCESS) { | |
3a2886cc | 328 | rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
1659185f AR |
329 | if (rds_conn_up(ic->conn)) |
330 | rds_ib_conn_error(ic->conn, | |
331 | "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", | |
332 | &ic->conn->c_laddr, | |
333 | &ic->conn->c_faddr, | |
334 | wc->status, | |
335 | ib_wc_status_msg(wc->status), | |
336 | wc->vendor_err); | |
337 | } | |
338 | ||
339 | if (frmr->fr_inv) { | |
3a2886cc | 340 | rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE); |
1659185f | 341 | frmr->fr_inv = false; |
2c7da8e6 | 342 | wake_up(&frmr->fr_inv_done); |
1659185f | 343 | } |
a5520788 | 344 | |
5f33141d GR |
345 | if (frmr->fr_reg) { |
346 | frmr->fr_reg = false; | |
347 | wake_up(&frmr->fr_reg_done); | |
348 | } | |
349 | ||
3a2886cc GR |
350 | /* enforce order of frmr->{fr_reg,fr_inv} update |
351 | * before incrementing i_fastreg_wrs | |
352 | */ | |
353 | smp_mb__before_atomic(); | |
a5520788 | 354 | atomic_inc(&ic->i_fastreg_wrs); |
1659185f AR |
355 | } |
356 | ||
357 | void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed, | |
358 | unsigned long *unpinned, unsigned int goal) | |
359 | { | |
360 | struct rds_ib_mr *ibmr, *next; | |
361 | struct rds_ib_frmr *frmr; | |
5f33141d | 362 | int ret = 0, ret2; |
1659185f AR |
363 | unsigned int freed = *nfreed; |
364 | ||
365 | /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ | |
366 | list_for_each_entry(ibmr, list, unmap_list) { | |
5f33141d GR |
367 | if (ibmr->sg_dma_len) { |
368 | ret2 = rds_ib_post_inv(ibmr); | |
369 | if (ret2 && !ret) | |
370 | ret = ret2; | |
371 | } | |
1659185f | 372 | } |
5f33141d | 373 | |
1659185f AR |
374 | if (ret) |
375 | pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret); | |
376 | ||
377 | /* Now we can destroy the DMA mapping and unpin any pages */ | |
378 | list_for_each_entry_safe(ibmr, next, list, unmap_list) { | |
379 | *unpinned += ibmr->sg_len; | |
380 | frmr = &ibmr->u.frmr; | |
381 | __rds_ib_teardown_mr(ibmr); | |
382 | if (freed < goal || frmr->fr_state == FRMR_IS_STALE) { | |
383 | /* Don't de-allocate if the MR is not free yet */ | |
384 | if (frmr->fr_state == FRMR_IS_INUSE) | |
385 | continue; | |
386 | ||
387 | if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) | |
388 | rds_ib_stats_inc(s_ib_rdma_mr_8k_free); | |
389 | else | |
390 | rds_ib_stats_inc(s_ib_rdma_mr_1m_free); | |
391 | list_del(&ibmr->unmap_list); | |
392 | if (frmr->mr) | |
393 | ib_dereg_mr(frmr->mr); | |
394 | kfree(ibmr); | |
395 | freed++; | |
396 | } | |
397 | } | |
398 | *nfreed = freed; | |
399 | } | |
400 | ||
401 | struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, | |
402 | struct rds_ib_connection *ic, | |
403 | struct scatterlist *sg, | |
404 | unsigned long nents, u32 *key) | |
405 | { | |
406 | struct rds_ib_mr *ibmr = NULL; | |
407 | struct rds_ib_frmr *frmr; | |
408 | int ret; | |
409 | ||
9e630bcb AR |
410 | if (!ic) { |
411 | /* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/ | |
412 | return ERR_PTR(-EOPNOTSUPP); | |
413 | } | |
414 | ||
1659185f | 415 | do { |
5f33141d | 416 | if (ibmr) |
1659185f AR |
417 | rds_ib_free_frmr(ibmr, true); |
418 | ibmr = rds_ib_alloc_frmr(rds_ibdev, nents); | |
419 | if (IS_ERR(ibmr)) | |
420 | return ibmr; | |
421 | frmr = &ibmr->u.frmr; | |
422 | } while (frmr->fr_state != FRMR_IS_FREE); | |
423 | ||
424 | ibmr->ic = ic; | |
425 | ibmr->device = rds_ibdev; | |
426 | ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents); | |
427 | if (ret == 0) { | |
428 | *key = frmr->mr->rkey; | |
429 | } else { | |
430 | rds_ib_free_frmr(ibmr, false); | |
431 | ibmr = ERR_PTR(ret); | |
432 | } | |
433 | ||
434 | return ibmr; | |
435 | } | |
436 | ||
437 | void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr) | |
438 | { | |
439 | struct rds_ib_mr_pool *pool = ibmr->pool; | |
440 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; | |
441 | ||
442 | if (frmr->fr_state == FRMR_IS_STALE) | |
443 | llist_add(&ibmr->llnode, &pool->drop_list); | |
444 | else | |
445 | llist_add(&ibmr->llnode, &pool->free_list); | |
446 | } |