Commit | Line | Data |
---|---|---|
db37bc17 DM |
1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) |
2 | ||
3 | #include <linux/bpf_trace.h> | |
4 | #include <linux/dma-mapping.h> | |
5 | #include <linux/etherdevice.h> | |
6 | #include <linux/filter.h> | |
7 | #include <linux/irq.h> | |
8 | #include <linux/pci.h> | |
9 | #include <linux/skbuff.h> | |
10 | #include "funeth_txrx.h" | |
11 | #include "funeth.h" | |
12 | #include "fun_queue.h" | |
13 | ||
14 | #define CREATE_TRACE_POINTS | |
15 | #include "funeth_trace.h" | |
16 | ||
17 | /* Given the device's max supported MTU and pages of at least 4KB a packet can | |
18 | * be scattered into at most 4 buffers. | |
19 | */ | |
20 | #define RX_MAX_FRAGS 4 | |
21 | ||
22 | /* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */ | |
23 | #define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) | |
24 | ||
25 | /* We try to reuse pages for our buffers. To avoid frequent page ref writes we | |
26 | * take EXTRA_PAGE_REFS references at once and then hand them out one per packet | |
27 | * occupying the buffer. | |
28 | */ | |
29 | #define EXTRA_PAGE_REFS 1000000 | |
30 | #define MIN_PAGE_REFS 1000 | |
31 | ||
32 | enum { | |
33 | FUN_XDP_FLUSH_REDIR = 1, | |
34 | FUN_XDP_FLUSH_TX = 2, | |
35 | }; | |
36 | ||
37 | /* See if a page is running low on refs we are holding and if so take more. */ | |
38 | static void refresh_refs(struct funeth_rxbuf *buf) | |
39 | { | |
40 | if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) { | |
41 | buf->pg_refs += EXTRA_PAGE_REFS; | |
42 | page_ref_add(buf->page, EXTRA_PAGE_REFS); | |
43 | } | |
44 | } | |
45 | ||
46 | /* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its | |
47 | * page is worth retaining and there's room for it. Otherwise the page is | |
48 | * unmapped and our references released. | |
49 | */ | |
50 | static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf) | |
51 | { | |
52 | struct funeth_rx_cache *c = &q->cache; | |
53 | ||
54 | if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) { | |
55 | c->bufs[c->prod_cnt & c->mask] = *buf; | |
56 | c->prod_cnt++; | |
57 | } else { | |
58 | dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, | |
59 | DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); | |
60 | __page_frag_cache_drain(buf->page, buf->pg_refs); | |
61 | } | |
62 | } | |
63 | ||
64 | /* Get a page from the Rx buffer cache. We only consider the next available | |
65 | * page and return it if we own all its references. | |
66 | */ | |
67 | static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb) | |
68 | { | |
69 | struct funeth_rx_cache *c = &q->cache; | |
70 | struct funeth_rxbuf *buf; | |
71 | ||
72 | if (c->prod_cnt == c->cons_cnt) | |
73 | return false; /* empty cache */ | |
74 | ||
75 | buf = &c->bufs[c->cons_cnt & c->mask]; | |
76 | if (page_ref_count(buf->page) == buf->pg_refs) { | |
77 | dma_sync_single_for_device(q->dma_dev, buf->dma_addr, | |
78 | PAGE_SIZE, DMA_FROM_DEVICE); | |
79 | *rb = *buf; | |
80 | buf->page = NULL; | |
81 | refresh_refs(rb); | |
82 | c->cons_cnt++; | |
83 | return true; | |
84 | } | |
85 | ||
86 | /* Page can't be reused. If the cache is full drop this page. */ | |
87 | if (c->prod_cnt - c->cons_cnt > c->mask) { | |
88 | dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, | |
89 | DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); | |
90 | __page_frag_cache_drain(buf->page, buf->pg_refs); | |
91 | buf->page = NULL; | |
92 | c->cons_cnt++; | |
93 | } | |
94 | return false; | |
95 | } | |
96 | ||
97 | /* Allocate and DMA-map a page for receive. */ | |
98 | static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb, | |
99 | int node, gfp_t gfp) | |
100 | { | |
101 | struct page *p; | |
102 | ||
103 | if (cache_get(q, rb)) | |
104 | return 0; | |
105 | ||
106 | p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0); | |
107 | if (unlikely(!p)) | |
108 | return -ENOMEM; | |
109 | ||
110 | rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE, | |
111 | DMA_FROM_DEVICE); | |
112 | if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) { | |
113 | FUN_QSTAT_INC(q, rx_map_err); | |
114 | __free_page(p); | |
115 | return -ENOMEM; | |
116 | } | |
117 | ||
118 | FUN_QSTAT_INC(q, rx_page_alloc); | |
119 | ||
120 | rb->page = p; | |
121 | rb->pg_refs = 1; | |
122 | refresh_refs(rb); | |
123 | rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p); | |
124 | return 0; | |
125 | } | |
126 | ||
127 | static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb) | |
128 | { | |
129 | if (rb->page) { | |
130 | dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE, | |
131 | DMA_FROM_DEVICE); | |
132 | __page_frag_cache_drain(rb->page, rb->pg_refs); | |
133 | rb->page = NULL; | |
134 | } | |
135 | } | |
136 | ||
137 | /* Run the XDP program assigned to an Rx queue. | |
138 | * Return %NULL if the buffer is consumed, or the virtual address of the packet | |
139 | * to turn into an skb. | |
140 | */ | |
141 | static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, | |
142 | int ref_ok, struct funeth_txq *xdp_q) | |
143 | { | |
144 | struct bpf_prog *xdp_prog; | |
51a83391 | 145 | struct xdp_frame *xdpf; |
db37bc17 DM |
146 | struct xdp_buff xdp; |
147 | u32 act; | |
148 | ||
149 | /* VA includes the headroom, frag size includes headroom + tailroom */ | |
150 | xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN), | |
151 | &q->xdp_rxq); | |
152 | xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) - | |
153 | (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false); | |
154 | ||
155 | xdp_prog = READ_ONCE(q->xdp_prog); | |
156 | act = bpf_prog_run_xdp(xdp_prog, &xdp); | |
157 | ||
158 | switch (act) { | |
159 | case XDP_PASS: | |
160 | /* remove headroom, which may not be FUN_XDP_HEADROOM now */ | |
161 | skb_frag_size_set(frags, xdp.data_end - xdp.data); | |
162 | skb_frag_off_add(frags, xdp.data - xdp.data_hard_start); | |
163 | goto pass; | |
164 | case XDP_TX: | |
165 | if (unlikely(!ref_ok)) | |
166 | goto pass; | |
51a83391 DM |
167 | |
168 | xdpf = xdp_convert_buff_to_frame(&xdp); | |
169 | if (!xdpf || !fun_xdp_tx(xdp_q, xdpf)) | |
db37bc17 DM |
170 | goto xdp_error; |
171 | FUN_QSTAT_INC(q, xdp_tx); | |
172 | q->xdp_flush |= FUN_XDP_FLUSH_TX; | |
173 | break; | |
174 | case XDP_REDIRECT: | |
175 | if (unlikely(!ref_ok)) | |
176 | goto pass; | |
177 | if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog))) | |
178 | goto xdp_error; | |
179 | FUN_QSTAT_INC(q, xdp_redir); | |
180 | q->xdp_flush |= FUN_XDP_FLUSH_REDIR; | |
181 | break; | |
182 | default: | |
183 | bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act); | |
184 | fallthrough; | |
185 | case XDP_ABORTED: | |
186 | trace_xdp_exception(q->netdev, xdp_prog, act); | |
187 | xdp_error: | |
188 | q->cur_buf->pg_refs++; /* return frags' page reference */ | |
189 | FUN_QSTAT_INC(q, xdp_err); | |
190 | break; | |
191 | case XDP_DROP: | |
192 | q->cur_buf->pg_refs++; | |
193 | FUN_QSTAT_INC(q, xdp_drops); | |
194 | break; | |
195 | } | |
196 | return NULL; | |
197 | ||
198 | pass: | |
199 | return xdp.data; | |
200 | } | |
201 | ||
202 | /* A CQE contains a fixed completion structure along with optional metadata and | |
203 | * even packet data. Given the start address of a CQE return the start of the | |
204 | * contained fixed structure, which lies at the end. | |
205 | */ | |
206 | static const void *cqe_to_info(const void *cqe) | |
207 | { | |
208 | return cqe + FUNETH_CQE_INFO_OFFSET; | |
209 | } | |
210 | ||
211 | /* The inverse of cqe_to_info(). */ | |
212 | static const void *info_to_cqe(const void *cqe_info) | |
213 | { | |
214 | return cqe_info - FUNETH_CQE_INFO_OFFSET; | |
215 | } | |
216 | ||
217 | /* Return the type of hash provided by the device based on the L3 and L4 | |
218 | * protocols it parsed for the packet. | |
219 | */ | |
220 | static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse) | |
221 | { | |
222 | static const enum pkt_hash_types htype_map[] = { | |
223 | PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, | |
224 | PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4, | |
225 | PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, | |
226 | PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3 | |
227 | }; | |
228 | u16 key; | |
229 | ||
230 | /* Build the key from the TCP/UDP and IP/IPv6 bits */ | |
231 | key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) | | |
232 | ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1); | |
233 | ||
234 | return htype_map[key]; | |
235 | } | |
236 | ||
237 | /* Each received packet can be scattered across several Rx buffers or can | |
238 | * share a buffer with previously received packets depending on the buffer | |
239 | * and packet sizes and the room available in the most recently used buffer. | |
240 | * | |
241 | * The rules are: | |
242 | * - If the buffer at the head of an RQ has not been used it gets (part of) the | |
243 | * next incoming packet. | |
244 | * - Otherwise, if the packet fully fits in the buffer's remaining space the | |
245 | * packet is written there. | |
246 | * - Otherwise, the packet goes into the next Rx buffer. | |
247 | * | |
248 | * This function returns the Rx buffer for a packet or fragment thereof of the | |
249 | * given length. If it isn't @buf it either recycles or frees that buffer | |
250 | * before advancing the queue to the next buffer. | |
251 | * | |
252 | * If called repeatedly with the remaining length of a packet it will walk | |
253 | * through all the buffers containing the packet. | |
254 | */ | |
255 | static struct funeth_rxbuf * | |
256 | get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len) | |
257 | { | |
258 | if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset) | |
259 | return buf; /* @buf holds (part of) the packet */ | |
260 | ||
261 | /* The packet occupies part of the next buffer. Move there after | |
262 | * replenishing the current buffer slot either with the spare page or | |
263 | * by reusing the slot's existing page. Note that if a spare page isn't | |
264 | * available and the current packet occupies @buf it is a multi-frag | |
265 | * packet that will be dropped leaving @buf available for reuse. | |
266 | */ | |
267 | if ((page_ref_count(buf->page) == buf->pg_refs && | |
268 | buf->node == numa_mem_id()) || !q->spare_buf.page) { | |
269 | dma_sync_single_for_device(q->dma_dev, buf->dma_addr, | |
270 | PAGE_SIZE, DMA_FROM_DEVICE); | |
271 | refresh_refs(buf); | |
272 | } else { | |
273 | cache_offer(q, buf); | |
274 | *buf = q->spare_buf; | |
275 | q->spare_buf.page = NULL; | |
276 | q->rqes[q->rq_cons & q->rq_mask] = | |
277 | FUN_EPRQ_RQBUF_INIT(buf->dma_addr); | |
278 | } | |
279 | q->buf_offset = 0; | |
280 | q->rq_cons++; | |
281 | return &q->bufs[q->rq_cons & q->rq_mask]; | |
282 | } | |
283 | ||
284 | /* Gather the page fragments making up the first Rx packet on @q. Its total | |
285 | * length @tot_len includes optional head- and tail-rooms. | |
286 | * | |
287 | * Return 0 if the device retains ownership of at least some of the pages. | |
288 | * In this case the caller may only copy the packet. | |
289 | * | |
290 | * A non-zero return value gives the caller permission to use references to the | |
291 | * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least | |
292 | * one of the pages is PF_MEMALLOC. | |
293 | * | |
294 | * Regardless of outcome the caller is granted a reference to each of the pages. | |
295 | */ | |
296 | static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len, | |
297 | skb_frag_t *frags) | |
298 | { | |
299 | struct funeth_rxbuf *buf = q->cur_buf; | |
300 | unsigned int frag_len; | |
301 | int ref_ok = 1; | |
302 | ||
303 | for (;;) { | |
304 | buf = get_buf(q, buf, tot_len); | |
305 | ||
306 | /* We always keep the RQ full of buffers so before we can give | |
307 | * one of our pages to the stack we require that we can obtain | |
308 | * a replacement page. If we can't the packet will either be | |
309 | * copied or dropped so we can retain ownership of the page and | |
310 | * reuse it. | |
311 | */ | |
312 | if (!q->spare_buf.page && | |
313 | funeth_alloc_page(q, &q->spare_buf, numa_mem_id(), | |
314 | GFP_ATOMIC | __GFP_MEMALLOC)) | |
315 | ref_ok = 0; | |
316 | ||
317 | frag_len = min_t(unsigned int, tot_len, | |
318 | PAGE_SIZE - q->buf_offset); | |
319 | dma_sync_single_for_cpu(q->dma_dev, | |
320 | buf->dma_addr + q->buf_offset, | |
321 | frag_len, DMA_FROM_DEVICE); | |
322 | buf->pg_refs--; | |
323 | if (ref_ok) | |
324 | ref_ok |= buf->node; | |
325 | ||
326 | __skb_frag_set_page(frags, buf->page); | |
327 | skb_frag_off_set(frags, q->buf_offset); | |
328 | skb_frag_size_set(frags++, frag_len); | |
329 | ||
330 | tot_len -= frag_len; | |
331 | if (!tot_len) | |
332 | break; | |
333 | ||
334 | q->buf_offset = PAGE_SIZE; | |
335 | } | |
336 | q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN); | |
337 | q->cur_buf = buf; | |
338 | return ref_ok; | |
339 | } | |
340 | ||
341 | static bool rx_hwtstamp_enabled(const struct net_device *dev) | |
342 | { | |
343 | const struct funeth_priv *d = netdev_priv(dev); | |
344 | ||
345 | return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL; | |
346 | } | |
347 | ||
348 | /* Advance the CQ pointers and phase tag to the next CQE. */ | |
349 | static void advance_cq(struct funeth_rxq *q) | |
350 | { | |
351 | if (unlikely(q->cq_head == q->cq_mask)) { | |
352 | q->cq_head = 0; | |
353 | q->phase ^= 1; | |
354 | q->next_cqe_info = cqe_to_info(q->cqes); | |
355 | } else { | |
356 | q->cq_head++; | |
357 | q->next_cqe_info += FUNETH_CQE_SIZE; | |
358 | } | |
359 | prefetch(q->next_cqe_info); | |
360 | } | |
361 | ||
362 | /* Process the packet represented by the head CQE of @q. Gather the packet's | |
363 | * fragments, run it through the optional XDP program, and if needed construct | |
364 | * an skb and pass it to the stack. | |
365 | */ | |
366 | static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q) | |
367 | { | |
368 | const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info); | |
369 | unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len); | |
370 | struct net_device *ndev = q->netdev; | |
371 | skb_frag_t frags[RX_MAX_FRAGS]; | |
372 | struct skb_shared_info *si; | |
373 | unsigned int headroom; | |
374 | gro_result_t gro_res; | |
375 | struct sk_buff *skb; | |
376 | int ref_ok; | |
377 | void *va; | |
378 | u16 cv; | |
379 | ||
380 | u64_stats_update_begin(&q->syncp); | |
381 | q->stats.rx_pkts++; | |
382 | q->stats.rx_bytes += pkt_len; | |
383 | u64_stats_update_end(&q->syncp); | |
384 | ||
385 | advance_cq(q); | |
386 | ||
387 | /* account for head- and tail-room, present only for 1-buffer packets */ | |
388 | tot_len = pkt_len; | |
389 | headroom = be16_to_cpu(rxreq->headroom); | |
390 | if (likely(headroom)) | |
391 | tot_len += FUN_RX_TAILROOM + headroom; | |
392 | ||
393 | ref_ok = fun_gather_pkt(q, tot_len, frags); | |
394 | va = skb_frag_address(frags); | |
395 | if (xdp_q && headroom == FUN_XDP_HEADROOM) { | |
396 | va = fun_run_xdp(q, frags, va, ref_ok, xdp_q); | |
397 | if (!va) | |
398 | return; | |
399 | headroom = 0; /* XDP_PASS trims it */ | |
400 | } | |
401 | if (unlikely(!ref_ok)) | |
402 | goto no_mem; | |
403 | ||
404 | if (likely(headroom)) { | |
405 | /* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */ | |
406 | prefetch(va + headroom); | |
407 | skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN)); | |
408 | if (unlikely(!skb)) | |
409 | goto no_mem; | |
410 | ||
411 | skb_reserve(skb, headroom); | |
412 | __skb_put(skb, pkt_len); | |
413 | skb->protocol = eth_type_trans(skb, ndev); | |
414 | } else { | |
415 | prefetch(va); | |
416 | skb = napi_get_frags(q->napi); | |
417 | if (unlikely(!skb)) | |
418 | goto no_mem; | |
419 | ||
420 | if (ref_ok < 0) | |
421 | skb->pfmemalloc = 1; | |
422 | ||
423 | si = skb_shinfo(skb); | |
424 | si->nr_frags = rxreq->nsgl; | |
425 | for (i = 0; i < si->nr_frags; i++) | |
426 | si->frags[i] = frags[i]; | |
427 | ||
428 | skb->len = pkt_len; | |
429 | skb->data_len = pkt_len; | |
430 | skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN); | |
431 | } | |
432 | ||
433 | skb_record_rx_queue(skb, q->qidx); | |
434 | cv = be16_to_cpu(rxreq->pkt_cv); | |
435 | if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash)) | |
436 | skb_set_hash(skb, be32_to_cpu(rxreq->hash), | |
437 | cqe_to_pkt_hash_type(cv)); | |
438 | if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) { | |
439 | FUN_QSTAT_INC(q, rx_cso); | |
440 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
441 | skb->csum_level = be16_to_cpu(rxreq->csum) - 1; | |
442 | } | |
443 | if (unlikely(rx_hwtstamp_enabled(q->netdev))) | |
444 | skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp); | |
445 | ||
446 | trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv); | |
447 | ||
448 | gro_res = skb->data_len ? napi_gro_frags(q->napi) : | |
449 | napi_gro_receive(q->napi, skb); | |
450 | if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE) | |
451 | FUN_QSTAT_INC(q, gro_merged); | |
452 | else if (gro_res == GRO_HELD) | |
453 | FUN_QSTAT_INC(q, gro_pkts); | |
454 | return; | |
455 | ||
456 | no_mem: | |
457 | FUN_QSTAT_INC(q, rx_mem_drops); | |
458 | ||
459 | /* Release the references we've been granted for the frag pages. | |
460 | * We return the ref of the last frag and free the rest. | |
461 | */ | |
462 | q->cur_buf->pg_refs++; | |
463 | for (i = 0; i < rxreq->nsgl - 1; i++) | |
464 | __free_page(skb_frag_page(frags + i)); | |
465 | } | |
466 | ||
467 | /* Return 0 if the phase tag of the CQE at the CQ's head matches expectations | |
468 | * indicating the CQE is new. | |
469 | */ | |
470 | static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase) | |
471 | { | |
472 | u16 sf_p = be16_to_cpu(ci->sf_p); | |
473 | ||
474 | return (sf_p & 1) ^ phase; | |
475 | } | |
476 | ||
477 | /* Walk through a CQ identifying and processing fresh CQEs up to the given | |
478 | * budget. Return the remaining budget. | |
479 | */ | |
480 | static int fun_process_cqes(struct funeth_rxq *q, int budget) | |
481 | { | |
482 | struct funeth_priv *fp = netdev_priv(q->netdev); | |
483 | struct funeth_txq **xdpqs, *xdp_q = NULL; | |
484 | ||
485 | xdpqs = rcu_dereference_bh(fp->xdpqs); | |
486 | if (xdpqs) | |
487 | xdp_q = xdpqs[smp_processor_id()]; | |
488 | ||
489 | while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) { | |
490 | /* access other descriptor fields after the phase check */ | |
491 | dma_rmb(); | |
492 | ||
493 | fun_handle_cqe_pkt(q, xdp_q); | |
494 | budget--; | |
495 | } | |
496 | ||
497 | if (unlikely(q->xdp_flush)) { | |
498 | if (q->xdp_flush & FUN_XDP_FLUSH_TX) | |
499 | fun_txq_wr_db(xdp_q); | |
500 | if (q->xdp_flush & FUN_XDP_FLUSH_REDIR) | |
501 | xdp_do_flush(); | |
502 | q->xdp_flush = 0; | |
503 | } | |
504 | ||
505 | return budget; | |
506 | } | |
507 | ||
508 | /* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ | |
509 | * doorbells as needed. | |
510 | */ | |
511 | int fun_rxq_napi_poll(struct napi_struct *napi, int budget) | |
512 | { | |
513 | struct fun_irq *irq = container_of(napi, struct fun_irq, napi); | |
514 | struct funeth_rxq *q = irq->rxq; | |
515 | int work_done = budget - fun_process_cqes(q, budget); | |
516 | u32 cq_db_val = q->cq_head; | |
517 | ||
518 | if (unlikely(work_done >= budget)) | |
519 | FUN_QSTAT_INC(q, rx_budget); | |
520 | else if (napi_complete_done(napi, work_done)) | |
521 | cq_db_val |= q->irq_db_val; | |
522 | ||
523 | /* check whether to post new Rx buffers */ | |
524 | if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) { | |
525 | u64_stats_update_begin(&q->syncp); | |
526 | q->stats.rx_bufs += q->rq_cons - q->rq_cons_db; | |
527 | u64_stats_update_end(&q->syncp); | |
528 | q->rq_cons_db = q->rq_cons; | |
529 | writel((q->rq_cons - 1) & q->rq_mask, q->rq_db); | |
530 | } | |
531 | ||
532 | writel(cq_db_val, q->cq_db); | |
533 | return work_done; | |
534 | } | |
535 | ||
536 | /* Free the Rx buffers of an Rx queue. */ | |
537 | static void fun_rxq_free_bufs(struct funeth_rxq *q) | |
538 | { | |
539 | struct funeth_rxbuf *b = q->bufs; | |
540 | unsigned int i; | |
541 | ||
542 | for (i = 0; i <= q->rq_mask; i++, b++) | |
543 | funeth_free_page(q, b); | |
544 | ||
545 | funeth_free_page(q, &q->spare_buf); | |
546 | q->cur_buf = NULL; | |
547 | } | |
548 | ||
549 | /* Initially provision an Rx queue with Rx buffers. */ | |
550 | static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node) | |
551 | { | |
552 | struct funeth_rxbuf *b = q->bufs; | |
553 | unsigned int i; | |
554 | ||
555 | for (i = 0; i <= q->rq_mask; i++, b++) { | |
556 | if (funeth_alloc_page(q, b, node, GFP_KERNEL)) { | |
557 | fun_rxq_free_bufs(q); | |
558 | return -ENOMEM; | |
559 | } | |
560 | q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr); | |
561 | } | |
562 | q->cur_buf = q->bufs; | |
563 | return 0; | |
564 | } | |
565 | ||
566 | /* Initialize a used-buffer cache of the given depth. */ | |
567 | static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth, | |
568 | int node) | |
569 | { | |
570 | c->mask = depth - 1; | |
571 | c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node); | |
572 | return c->bufs ? 0 : -ENOMEM; | |
573 | } | |
574 | ||
575 | /* Deallocate an Rx queue's used-buffer cache and its contents. */ | |
576 | static void fun_rxq_free_cache(struct funeth_rxq *q) | |
577 | { | |
578 | struct funeth_rxbuf *b = q->cache.bufs; | |
579 | unsigned int i; | |
580 | ||
581 | for (i = 0; i <= q->cache.mask; i++, b++) | |
582 | funeth_free_page(q, b); | |
583 | ||
584 | kvfree(q->cache.bufs); | |
585 | q->cache.bufs = NULL; | |
586 | } | |
587 | ||
588 | int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog) | |
589 | { | |
590 | struct funeth_priv *fp = netdev_priv(q->netdev); | |
591 | struct fun_admin_epcq_req cmd; | |
592 | u16 headroom; | |
593 | int err; | |
594 | ||
595 | headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; | |
596 | if (headroom != q->headroom) { | |
597 | cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ, | |
598 | sizeof(cmd)); | |
599 | cmd.u.modify = | |
600 | FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY, | |
601 | 0, q->hw_cqid, headroom); | |
602 | err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0, | |
603 | 0); | |
604 | if (err) | |
605 | return err; | |
606 | q->headroom = headroom; | |
607 | } | |
608 | ||
609 | WRITE_ONCE(q->xdp_prog, prog); | |
610 | return 0; | |
611 | } | |
612 | ||
613 | /* Create an Rx queue, allocating the host memory it needs. */ | |
614 | static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev, | |
615 | unsigned int qidx, | |
616 | unsigned int ncqe, | |
617 | unsigned int nrqe, | |
618 | struct fun_irq *irq) | |
619 | { | |
620 | struct funeth_priv *fp = netdev_priv(dev); | |
621 | struct funeth_rxq *q; | |
622 | int err = -ENOMEM; | |
623 | int numa_node; | |
624 | ||
625 | numa_node = fun_irq_node(irq); | |
626 | q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node); | |
627 | if (!q) | |
628 | goto err; | |
629 | ||
630 | q->qidx = qidx; | |
631 | q->netdev = dev; | |
632 | q->cq_mask = ncqe - 1; | |
633 | q->rq_mask = nrqe - 1; | |
634 | q->numa_node = numa_node; | |
635 | q->rq_db_thres = nrqe / 4; | |
636 | u64_stats_init(&q->syncp); | |
637 | q->dma_dev = &fp->pdev->dev; | |
638 | ||
639 | q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), | |
640 | sizeof(*q->bufs), false, numa_node, | |
641 | &q->rq_dma_addr, (void **)&q->bufs, NULL); | |
642 | if (!q->rqes) | |
643 | goto free_q; | |
644 | ||
645 | q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0, | |
646 | false, numa_node, &q->cq_dma_addr, NULL, | |
647 | NULL); | |
648 | if (!q->cqes) | |
649 | goto free_rqes; | |
650 | ||
651 | err = fun_rxq_init_cache(&q->cache, nrqe, numa_node); | |
652 | if (err) | |
653 | goto free_cqes; | |
654 | ||
655 | err = fun_rxq_alloc_bufs(q, numa_node); | |
656 | if (err) | |
657 | goto free_cache; | |
658 | ||
659 | q->stats.rx_bufs = q->rq_mask; | |
660 | q->init_state = FUN_QSTATE_INIT_SW; | |
661 | return q; | |
662 | ||
663 | free_cache: | |
664 | fun_rxq_free_cache(q); | |
665 | free_cqes: | |
666 | dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes, | |
667 | q->cq_dma_addr); | |
668 | free_rqes: | |
669 | fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes, | |
670 | q->rq_dma_addr, q->bufs); | |
671 | free_q: | |
672 | kfree(q); | |
673 | err: | |
674 | netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx); | |
675 | return ERR_PTR(err); | |
676 | } | |
677 | ||
678 | static void fun_rxq_free_sw(struct funeth_rxq *q) | |
679 | { | |
680 | struct funeth_priv *fp = netdev_priv(q->netdev); | |
681 | ||
682 | fun_rxq_free_cache(q); | |
683 | fun_rxq_free_bufs(q); | |
684 | fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false, | |
685 | q->rqes, q->rq_dma_addr, q->bufs); | |
686 | dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE, | |
687 | q->cqes, q->cq_dma_addr); | |
688 | ||
689 | /* Before freeing the queue transfer key counters to the device. */ | |
690 | fp->rx_packets += q->stats.rx_pkts; | |
691 | fp->rx_bytes += q->stats.rx_bytes; | |
692 | fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops; | |
693 | ||
694 | kfree(q); | |
695 | } | |
696 | ||
697 | /* Create an Rx queue's resources on the device. */ | |
698 | int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq) | |
699 | { | |
700 | struct funeth_priv *fp = netdev_priv(q->netdev); | |
701 | unsigned int ncqe = q->cq_mask + 1; | |
702 | unsigned int nrqe = q->rq_mask + 1; | |
703 | int err; | |
704 | ||
705 | err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx, | |
706 | irq->napi.napi_id); | |
707 | if (err) | |
708 | goto out; | |
709 | ||
710 | err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED, | |
711 | NULL); | |
712 | if (err) | |
713 | goto xdp_unreg; | |
714 | ||
715 | q->phase = 1; | |
716 | q->irq_cnt = 0; | |
717 | q->cq_head = 0; | |
718 | q->rq_cons = 0; | |
719 | q->rq_cons_db = 0; | |
720 | q->buf_offset = 0; | |
721 | q->napi = &irq->napi; | |
722 | q->irq_db_val = fp->cq_irq_db; | |
723 | q->next_cqe_info = cqe_to_info(q->cqes); | |
724 | ||
725 | q->xdp_prog = fp->xdp_prog; | |
726 | q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; | |
727 | ||
728 | err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | | |
729 | FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0, | |
730 | FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0, | |
731 | 0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT, | |
732 | &q->hw_sqid, &q->rq_db); | |
733 | if (err) | |
734 | goto xdp_unreg; | |
735 | ||
736 | err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | | |
737 | FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0, | |
738 | q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe, | |
739 | q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0, | |
740 | irq->irq_idx, 0, fp->fdev->kern_end_qid, | |
741 | &q->hw_cqid, &q->cq_db); | |
742 | if (err) | |
743 | goto free_rq; | |
744 | ||
745 | irq->rxq = q; | |
746 | writel(q->rq_mask, q->rq_db); | |
747 | q->init_state = FUN_QSTATE_INIT_FULL; | |
748 | ||
749 | netif_info(fp, ifup, q->netdev, | |
750 | "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n", | |
751 | q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx, | |
752 | q->numa_node, q->headroom); | |
753 | return 0; | |
754 | ||
755 | free_rq: | |
756 | fun_destroy_sq(fp->fdev, q->hw_sqid); | |
757 | xdp_unreg: | |
758 | xdp_rxq_info_unreg(&q->xdp_rxq); | |
759 | out: | |
760 | netdev_err(q->netdev, | |
761 | "Failed to create Rx queue %u on device, error %d\n", | |
762 | q->qidx, err); | |
763 | return err; | |
764 | } | |
765 | ||
766 | static void fun_rxq_free_dev(struct funeth_rxq *q) | |
767 | { | |
768 | struct funeth_priv *fp = netdev_priv(q->netdev); | |
769 | struct fun_irq *irq; | |
770 | ||
771 | if (q->init_state < FUN_QSTATE_INIT_FULL) | |
772 | return; | |
773 | ||
774 | irq = container_of(q->napi, struct fun_irq, napi); | |
775 | netif_info(fp, ifdown, q->netdev, | |
776 | "Freeing Rx queue %u (id %u/%u), IRQ %u\n", | |
777 | q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx); | |
778 | ||
779 | irq->rxq = NULL; | |
780 | xdp_rxq_info_unreg(&q->xdp_rxq); | |
781 | fun_destroy_sq(fp->fdev, q->hw_sqid); | |
782 | fun_destroy_cq(fp->fdev, q->hw_cqid); | |
783 | q->init_state = FUN_QSTATE_INIT_SW; | |
784 | } | |
785 | ||
786 | /* Create or advance an Rx queue, allocating all the host and device resources | |
787 | * needed to reach the target state. | |
788 | */ | |
789 | int funeth_rxq_create(struct net_device *dev, unsigned int qidx, | |
790 | unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq, | |
791 | int state, struct funeth_rxq **qp) | |
792 | { | |
793 | struct funeth_rxq *q = *qp; | |
794 | int err; | |
795 | ||
796 | if (!q) { | |
797 | q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq); | |
798 | if (IS_ERR(q)) | |
799 | return PTR_ERR(q); | |
800 | } | |
801 | ||
802 | if (q->init_state >= state) | |
803 | goto out; | |
804 | ||
805 | err = fun_rxq_create_dev(q, irq); | |
806 | if (err) { | |
807 | if (!*qp) | |
808 | fun_rxq_free_sw(q); | |
809 | return err; | |
810 | } | |
811 | ||
812 | out: | |
813 | *qp = q; | |
814 | return 0; | |
815 | } | |
816 | ||
817 | /* Free Rx queue resources until it reaches the target state. */ | |
818 | struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state) | |
819 | { | |
820 | if (state < FUN_QSTATE_INIT_FULL) | |
821 | fun_rxq_free_dev(q); | |
822 | ||
823 | if (state == FUN_QSTATE_DESTROYED) { | |
824 | fun_rxq_free_sw(q); | |
825 | q = NULL; | |
826 | } | |
827 | ||
828 | return q; | |
829 | } |