1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
4 * AF_XDP user-space access library.
6 * Copyright(c) 2018 - 2019 Intel Corporation.
15 #include <arpa/inet.h>
16 #include <asm/barrier.h>
17 #include <linux/compiler.h>
18 #include <linux/ethtool.h>
19 #include <linux/filter.h>
20 #include <linux/if_ether.h>
21 #include <linux/if_packet.h>
22 #include <linux/if_xdp.h>
23 #include <linux/kernel.h>
24 #include <linux/list.h>
25 #include <linux/sockios.h>
27 #include <sys/ioctl.h>
29 #include <sys/socket.h>
30 #include <sys/types.h>
31 #include <linux/if_link.h>
34 #include <bpf/libbpf.h>
50 #define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
55 struct xsk_ring_prod *fill_save;
56 struct xsk_ring_cons *comp_save;
58 struct xsk_umem_config config;
61 struct list_head ctx_list;
62 bool rx_ring_setup_done;
63 bool tx_ring_setup_done;
67 struct xsk_ring_prod *fill;
68 struct xsk_ring_cons *comp;
70 struct xsk_umem *umem;
73 struct list_head list;
77 struct xsk_ring_cons *rx;
78 struct xsk_ring_prod *tx;
80 struct xsk_socket_config config;
84 int xsk_umem__fd(const struct xsk_umem *umem)
86 return umem ? umem->fd : -EINVAL;
89 int xsk_socket__fd(const struct xsk_socket *xsk)
91 return xsk ? xsk->fd : -EINVAL;
94 static bool xsk_page_aligned(void *buffer)
96 unsigned long addr = (unsigned long)buffer;
98 return !(addr & (getpagesize() - 1));
101 static void xsk_set_umem_config(struct xsk_umem_config *cfg,
102 const struct xsk_umem_config *usr_cfg)
105 cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
106 cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
107 cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
108 cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
109 cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
113 cfg->fill_size = usr_cfg->fill_size;
114 cfg->comp_size = usr_cfg->comp_size;
115 cfg->frame_size = usr_cfg->frame_size;
116 cfg->frame_headroom = usr_cfg->frame_headroom;
117 cfg->flags = usr_cfg->flags;
120 static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
121 const struct xsk_socket_config *usr_cfg)
124 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
125 cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
130 cfg->rx_size = usr_cfg->rx_size;
131 cfg->tx_size = usr_cfg->tx_size;
132 cfg->bind_flags = usr_cfg->bind_flags;
137 static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
142 optlen = sizeof(*off);
143 err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
147 if (optlen == sizeof(*off))
153 static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
154 struct xsk_ring_prod *fill,
155 struct xsk_ring_cons *comp)
157 struct xdp_mmap_offsets off;
161 err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
162 &umem->config.fill_size,
163 sizeof(umem->config.fill_size));
167 err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
168 &umem->config.comp_size,
169 sizeof(umem->config.comp_size));
173 err = xsk_get_mmap_offsets(fd, &off);
177 map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
178 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
179 XDP_UMEM_PGOFF_FILL_RING);
180 if (map == MAP_FAILED)
183 fill->mask = umem->config.fill_size - 1;
184 fill->size = umem->config.fill_size;
185 fill->producer = map + off.fr.producer;
186 fill->consumer = map + off.fr.consumer;
187 fill->flags = map + off.fr.flags;
188 fill->ring = map + off.fr.desc;
189 fill->cached_cons = umem->config.fill_size;
191 map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
192 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
193 XDP_UMEM_PGOFF_COMPLETION_RING);
194 if (map == MAP_FAILED) {
199 comp->mask = umem->config.comp_size - 1;
200 comp->size = umem->config.comp_size;
201 comp->producer = map + off.cr.producer;
202 comp->consumer = map + off.cr.consumer;
203 comp->flags = map + off.cr.flags;
204 comp->ring = map + off.cr.desc;
209 munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
213 int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
214 __u64 size, struct xsk_ring_prod *fill,
215 struct xsk_ring_cons *comp,
216 const struct xsk_umem_config *usr_config)
218 struct xdp_umem_reg mr;
219 struct xsk_umem *umem;
222 if (!umem_area || !umem_ptr || !fill || !comp)
224 if (!size && !xsk_page_aligned(umem_area))
227 umem = calloc(1, sizeof(*umem));
231 umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
237 umem->umem_area = umem_area;
238 INIT_LIST_HEAD(&umem->ctx_list);
239 xsk_set_umem_config(&umem->config, usr_config);
241 memset(&mr, 0, sizeof(mr));
242 mr.addr = (uintptr_t)umem_area;
244 mr.chunk_size = umem->config.frame_size;
245 mr.headroom = umem->config.frame_headroom;
246 mr.flags = umem->config.flags;
248 err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
254 err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
258 umem->fill_save = fill;
259 umem->comp_save = comp;
270 bool xsk_is_in_mode(u32 ifindex, int mode)
272 LIBBPF_OPTS(bpf_xdp_query_opts, opts);
275 ret = bpf_xdp_query(ifindex, mode, &opts);
277 printf("XDP mode query returned error %s\n", strerror(errno));
281 if (mode == XDP_FLAGS_DRV_MODE)
282 return opts.attach_mode == XDP_ATTACHED_DRV;
283 else if (mode == XDP_FLAGS_SKB_MODE)
284 return opts.attach_mode == XDP_ATTACHED_SKB;
289 int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
293 prog_fd = bpf_program__fd(prog);
294 return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
297 void xsk_detach_xdp_program(int ifindex, u32 xdp_flags)
299 bpf_xdp_detach(ifindex, xdp_flags, NULL);
302 void xsk_clear_xskmap(struct bpf_map *map)
307 map_fd = bpf_map__fd(map);
308 bpf_map_delete_elem(map_fd, &index);
311 int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk)
316 map_fd = bpf_map__fd(map);
317 sock_fd = xsk_socket__fd(xsk);
319 return bpf_map_update_elem(map_fd, &index, &sock_fd, 0);
322 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
327 if (list_empty(&umem->ctx_list))
330 list_for_each_entry(ctx, &umem->ctx_list, list) {
331 if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
340 static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
342 struct xsk_umem *umem = ctx->umem;
343 struct xdp_mmap_offsets off;
352 err = xsk_get_mmap_offsets(umem->fd, &off);
356 munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
358 munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
362 list_del(&ctx->list);
366 static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
367 struct xsk_umem *umem, int ifindex,
369 struct xsk_ring_prod *fill,
370 struct xsk_ring_cons *comp)
375 ctx = calloc(1, sizeof(*ctx));
379 if (!umem->fill_save) {
380 err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
385 } else if (umem->fill_save != fill || umem->comp_save != comp) {
386 /* Copy over rings to new structs. */
387 memcpy(fill, umem->fill_save, sizeof(*fill));
388 memcpy(comp, umem->comp_save, sizeof(*comp));
391 ctx->ifindex = ifindex;
394 ctx->queue_id = queue_id;
398 list_add(&ctx->list, &umem->ctx_list);
402 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
404 __u32 queue_id, struct xsk_umem *umem,
405 struct xsk_ring_cons *rx,
406 struct xsk_ring_prod *tx,
407 struct xsk_ring_prod *fill,
408 struct xsk_ring_cons *comp,
409 const struct xsk_socket_config *usr_config)
411 bool unmap, rx_setup_done = false, tx_setup_done = false;
412 void *rx_map = NULL, *tx_map = NULL;
413 struct sockaddr_xdp sxdp = {};
414 struct xdp_mmap_offsets off;
415 struct xsk_socket *xsk;
419 if (!umem || !xsk_ptr || !(rx || tx))
422 unmap = umem->fill_save != fill;
424 xsk = calloc(1, sizeof(*xsk));
428 err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
432 if (umem->refcount++ > 0) {
433 xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
440 rx_setup_done = umem->rx_ring_setup_done;
441 tx_setup_done = umem->tx_ring_setup_done;
444 ctx = xsk_get_ctx(umem, ifindex, queue_id);
446 if (!fill || !comp) {
451 ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp);
459 if (rx && !rx_setup_done) {
460 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
461 &xsk->config.rx_size,
462 sizeof(xsk->config.rx_size));
467 if (xsk->fd == umem->fd)
468 umem->rx_ring_setup_done = true;
470 if (tx && !tx_setup_done) {
471 err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
472 &xsk->config.tx_size,
473 sizeof(xsk->config.tx_size));
478 if (xsk->fd == umem->fd)
479 umem->tx_ring_setup_done = true;
482 err = xsk_get_mmap_offsets(xsk->fd, &off);
489 rx_map = mmap(NULL, off.rx.desc +
490 xsk->config.rx_size * sizeof(struct xdp_desc),
491 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
492 xsk->fd, XDP_PGOFF_RX_RING);
493 if (rx_map == MAP_FAILED) {
498 rx->mask = xsk->config.rx_size - 1;
499 rx->size = xsk->config.rx_size;
500 rx->producer = rx_map + off.rx.producer;
501 rx->consumer = rx_map + off.rx.consumer;
502 rx->flags = rx_map + off.rx.flags;
503 rx->ring = rx_map + off.rx.desc;
504 rx->cached_prod = *rx->producer;
505 rx->cached_cons = *rx->consumer;
510 tx_map = mmap(NULL, off.tx.desc +
511 xsk->config.tx_size * sizeof(struct xdp_desc),
512 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
513 xsk->fd, XDP_PGOFF_TX_RING);
514 if (tx_map == MAP_FAILED) {
519 tx->mask = xsk->config.tx_size - 1;
520 tx->size = xsk->config.tx_size;
521 tx->producer = tx_map + off.tx.producer;
522 tx->consumer = tx_map + off.tx.consumer;
523 tx->flags = tx_map + off.tx.flags;
524 tx->ring = tx_map + off.tx.desc;
525 tx->cached_prod = *tx->producer;
526 /* cached_cons is r->size bigger than the real consumer pointer
527 * See xsk_prod_nb_free
529 tx->cached_cons = *tx->consumer + xsk->config.tx_size;
533 sxdp.sxdp_family = PF_XDP;
534 sxdp.sxdp_ifindex = ctx->ifindex;
535 sxdp.sxdp_queue_id = ctx->queue_id;
536 if (umem->refcount > 1) {
537 sxdp.sxdp_flags |= XDP_SHARED_UMEM;
538 sxdp.sxdp_shared_umem_fd = umem->fd;
540 sxdp.sxdp_flags = xsk->config.bind_flags;
543 err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
550 umem->fill_save = NULL;
551 umem->comp_save = NULL;
556 munmap(tx_map, off.tx.desc +
557 xsk->config.tx_size * sizeof(struct xdp_desc));
560 munmap(rx_map, off.rx.desc +
561 xsk->config.rx_size * sizeof(struct xdp_desc));
563 xsk_put_ctx(ctx, unmap);
565 if (--umem->refcount)
572 int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex,
573 __u32 queue_id, struct xsk_umem *umem,
574 struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
575 const struct xsk_socket_config *usr_config)
580 return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem,
581 rx, tx, umem->fill_save,
582 umem->comp_save, usr_config);
585 int xsk_umem__delete(struct xsk_umem *umem)
587 struct xdp_mmap_offsets off;
596 err = xsk_get_mmap_offsets(umem->fd, &off);
597 if (!err && umem->fill_save && umem->comp_save) {
598 munmap(umem->fill_save->ring - off.fr.desc,
599 off.fr.desc + umem->config.fill_size * sizeof(__u64));
600 munmap(umem->comp_save->ring - off.cr.desc,
601 off.cr.desc + umem->config.comp_size * sizeof(__u64));
610 void xsk_socket__delete(struct xsk_socket *xsk)
612 size_t desc_sz = sizeof(struct xdp_desc);
613 struct xdp_mmap_offsets off;
614 struct xsk_umem *umem;
624 xsk_put_ctx(ctx, true);
626 err = xsk_get_mmap_offsets(xsk->fd, &off);
629 munmap(xsk->rx->ring - off.rx.desc,
630 off.rx.desc + xsk->config.rx_size * desc_sz);
633 munmap(xsk->tx->ring - off.tx.desc,
634 off.tx.desc + xsk->config.tx_size * desc_sz);
639 /* Do not close an fd that also has an associated umem connected
642 if (xsk->fd != umem->fd)