1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
4 /* Copyright (c) 2008-2019, IBM Corporation */
7 #include <rdma/ib_verbs.h>
8 #include <rdma/ib_umem.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/slab.h>
11 #include <linux/sched/mm.h>
12 #include <linux/resource.h>
17 /* Stag lookup is based on its index part only (24 bits). */
18 #define SIW_STAG_MAX_INDEX 0x00ffffff
21 * The code avoids special Stag of zero and tries to randomize
22 * STag values between 1 and SIW_STAG_MAX_INDEX.
24 int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
26 struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
29 get_random_bytes(&next, 4);
30 next &= SIW_STAG_MAX_INDEX;
32 if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
36 /* Set the STag index part */
39 siw_dbg_mem(m, "new MEM object\n");
47 * resolves memory from stag given by id. might be called from:
48 * o process context before sending out of sgl, or
49 * o in softirq when resolving target memory
51 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
56 mem = xa_load(&sdev->mem_xa, stag_index);
57 if (likely(mem && kref_get_unless_zero(&mem->ref))) {
66 void siw_umem_release(struct siw_umem *umem)
68 int i, num_pages = umem->num_pages;
71 ib_umem_release(umem->base_mem);
73 for (i = 0; num_pages > 0; i++) {
74 kfree(umem->page_chunk[i].plist);
75 num_pages -= PAGES_PER_CHUNK;
77 kfree(umem->page_chunk);
81 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
82 u64 start, u64 len, int rights)
84 struct siw_device *sdev = to_siw_dev(pd->device);
85 struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
86 struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
92 mem->mem_obj = mem_obj;
98 mem->perms = rights & IWARP_ACCESS_MASK;
101 get_random_bytes(&next, 4);
102 next &= SIW_STAG_MAX_INDEX;
104 if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
111 /* Set the STag index part */
113 mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
118 void siw_mr_drop_mem(struct siw_mr *mr)
120 struct siw_mem *mem = mr->mem, *found;
124 /* make STag invalid visible asap */
127 found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
128 WARN_ON(found != mem);
132 void siw_free_mem(struct kref *ref)
134 struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
136 siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
138 if (!mem->is_mw && mem->mem_obj) {
139 if (mem->is_pbl == 0)
140 siw_umem_release(mem->umem);
150 * Check protection domain, STAG state, access permissions and
151 * address range for memory object.
153 * @pd: Protection Domain memory should belong to
154 * @mem: memory to be checked
155 * @addr: starting addr of mem
156 * @perms: requested access permissions
157 * @len: len of memory interval to be checked
160 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
161 enum ib_access_flags perms, int len)
163 if (!mem->stag_valid) {
164 siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
165 return -E_STAG_INVALID;
168 siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
169 return -E_PD_MISMATCH;
172 * check access permissions
174 if ((mem->perms & perms) < perms) {
175 siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
177 return -E_ACCESS_PERM;
180 * Check if access falls into valid memory interval.
182 if (addr < mem->va || addr + len > mem->va + mem->len) {
183 siw_dbg_pd(pd, "MEM interval len %d\n", len);
184 siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
185 (void *)(uintptr_t)addr,
186 (void *)(uintptr_t)(addr + len));
187 siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
188 (void *)(uintptr_t)mem->va,
189 (void *)(uintptr_t)(mem->va + mem->len),
192 return -E_BASE_BOUNDS;
200 * Check SGE for access rights in given interval
202 * @pd: Protection Domain memory should belong to
203 * @sge: SGE to be checked
204 * @mem: location of memory reference within array
205 * @perms: requested access permissions
206 * @off: starting offset in SGE
207 * @len: len of memory interval to be checked
209 * NOTE: Function references SGE's memory object (mem->obj)
210 * if not yet done. New reference is kept if check went ok and
211 * released if check failed. If mem->obj is already valid, no new
212 * lookup is being done and mem is not released it check fails.
214 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
215 enum ib_access_flags perms, u32 off, int len)
217 struct siw_device *sdev = to_siw_dev(pd->device);
218 struct siw_mem *new = NULL;
219 int rv = E_ACCESS_OK;
221 if (len + off > sge->length) {
226 new = siw_mem_id2obj(sdev, sge->lkey >> 8);
227 if (unlikely(!new)) {
228 siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
229 rv = -E_STAG_INVALID;
234 /* Check if user re-registered with different STag key */
235 if (unlikely((*mem)->stag != sge->lkey)) {
236 siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
237 rv = -E_STAG_INVALID;
240 rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
254 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
259 case SIW_OP_SEND_WITH_IMM:
260 case SIW_OP_SEND_REMOTE_INV:
262 case SIW_OP_READ_LOCAL_INV:
263 if (!(wqe->sqe.flags & SIW_WQE_INLINE))
264 siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
268 siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
271 case SIW_OP_READ_RESPONSE:
272 siw_unref_mem_sgl(wqe->mem, 1);
277 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
278 * do not hold memory references
284 int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
286 struct siw_device *sdev = to_siw_dev(pd->device);
287 struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
290 if (unlikely(!mem)) {
291 siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
294 if (unlikely(mem->pd != pd)) {
295 siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
300 * Per RDMA verbs definition, an STag may already be in invalid
301 * state if invalidation is requested. So no state check here.
305 siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
312 * Gets physical address backed by PBL element. Address is referenced
313 * by linear byte offset into list of variably sized PB elements.
314 * Optionally, provides remaining len within current element, and
315 * current PBL index for later resume at same element.
317 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
319 int i = idx ? *idx : 0;
321 while (i < pbl->num_buf) {
322 struct siw_pble *pble = &pbl->pbe[i];
324 if (pble->pbl_off + pble->size > off) {
325 u64 pble_off = off - pble->pbl_off;
328 *len = pble->size - pble_off;
332 return pble->addr + pble_off;
341 struct siw_pbl *siw_pbl_alloc(u32 num_buf)
346 return ERR_PTR(-EINVAL);
348 pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
350 return ERR_PTR(-ENOMEM);
352 pbl->max_buf = num_buf;
357 struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
360 struct siw_umem *umem;
361 struct ib_umem *base_mem;
362 struct sg_page_iter sg_iter;
363 struct sg_table *sgt;
365 int num_pages, num_chunks, i, rv = 0;
368 return ERR_PTR(-EINVAL);
370 first_page_va = start & PAGE_MASK;
371 num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
372 num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
374 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
376 return ERR_PTR(-ENOMEM);
379 kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
380 if (!umem->page_chunk) {
384 base_mem = ib_umem_get(base_dev, start, len, rights);
385 if (IS_ERR(base_mem)) {
386 rv = PTR_ERR(base_mem);
387 siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv);
390 umem->fp_addr = first_page_va;
391 umem->base_mem = base_mem;
393 sgt = &base_mem->sgt_append.sgt;
394 __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
396 if (!__sg_page_iter_next(&sg_iter)) {
400 for (i = 0; num_pages > 0; i++) {
401 int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
402 struct page **plist =
403 kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
409 umem->page_chunk[i].plist = plist;
411 *plist = sg_page_iter_page(&sg_iter);
415 if (!__sg_page_iter_next(&sg_iter))
421 siw_umem_release(umem);