]> Git Repo - J-linux.git/blob - drivers/net/ethernet/google/gve/gve_rx.c
Merge tag 'trace-v5.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[J-linux.git] / drivers / net / ethernet / google / gve / gve_rx.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2019 Google, Inc.
5  */
6
7 #include "gve.h"
8 #include "gve_adminq.h"
9 #include <linux/etherdevice.h>
10
11 static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
12 {
13         struct gve_notify_block *block =
14                         &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
15
16         block->rx = NULL;
17 }
18
19 static void gve_rx_free_buffer(struct device *dev,
20                                struct gve_rx_slot_page_info *page_info,
21                                union gve_rx_data_slot *data_slot)
22 {
23         dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
24                                       GVE_DATA_SLOT_ADDR_PAGE_MASK);
25
26         gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
27 }
28
29 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
30 {
31         if (rx->data.raw_addressing) {
32                 u32 slots = rx->mask + 1;
33                 int i;
34
35                 for (i = 0; i < slots; i++)
36                         gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
37                                            &rx->data.data_ring[i]);
38         } else {
39                 gve_unassign_qpl(priv, rx->data.qpl->id);
40                 rx->data.qpl = NULL;
41         }
42         kvfree(rx->data.page_info);
43         rx->data.page_info = NULL;
44 }
45
46 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
47 {
48         struct gve_rx_ring *rx = &priv->rx[idx];
49         struct device *dev = &priv->pdev->dev;
50         u32 slots = rx->mask + 1;
51         size_t bytes;
52
53         gve_rx_remove_from_block(priv, idx);
54
55         bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
56         dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
57         rx->desc.desc_ring = NULL;
58
59         dma_free_coherent(dev, sizeof(*rx->q_resources),
60                           rx->q_resources, rx->q_resources_bus);
61         rx->q_resources = NULL;
62
63         gve_rx_unfill_pages(priv, rx);
64
65         bytes = sizeof(*rx->data.data_ring) * slots;
66         dma_free_coherent(dev, bytes, rx->data.data_ring,
67                           rx->data.data_bus);
68         rx->data.data_ring = NULL;
69         netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
70 }
71
72 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
73                              dma_addr_t addr, struct page *page, __be64 *slot_addr)
74 {
75         page_info->page = page;
76         page_info->page_offset = 0;
77         page_info->page_address = page_address(page);
78         *slot_addr = cpu_to_be64(addr);
79 }
80
81 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
82                                struct gve_rx_slot_page_info *page_info,
83                                union gve_rx_data_slot *data_slot)
84 {
85         struct page *page;
86         dma_addr_t dma;
87         int err;
88
89         err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
90         if (err)
91                 return err;
92
93         gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
94         return 0;
95 }
96
97 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
98 {
99         struct gve_priv *priv = rx->gve;
100         u32 slots;
101         int err;
102         int i;
103
104         /* Allocate one page per Rx queue slot. Each page is split into two
105          * packet buffers, when possible we "page flip" between the two.
106          */
107         slots = rx->mask + 1;
108
109         rx->data.page_info = kvzalloc(slots *
110                                       sizeof(*rx->data.page_info), GFP_KERNEL);
111         if (!rx->data.page_info)
112                 return -ENOMEM;
113
114         if (!rx->data.raw_addressing)
115                 rx->data.qpl = gve_assign_rx_qpl(priv);
116         for (i = 0; i < slots; i++) {
117                 if (!rx->data.raw_addressing) {
118                         struct page *page = rx->data.qpl->pages[i];
119                         dma_addr_t addr = i * PAGE_SIZE;
120
121                         gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
122                                             &rx->data.data_ring[i].qpl_offset);
123                         continue;
124                 }
125                 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
126                                           &rx->data.data_ring[i]);
127                 if (err)
128                         goto alloc_err;
129         }
130
131         return slots;
132 alloc_err:
133         while (i--)
134                 gve_rx_free_buffer(&priv->pdev->dev,
135                                    &rx->data.page_info[i],
136                                    &rx->data.data_ring[i]);
137         return err;
138 }
139
140 static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
141 {
142         u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
143         struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
144         struct gve_rx_ring *rx = &priv->rx[queue_idx];
145
146         block->rx = rx;
147         rx->ntfy_id = ntfy_idx;
148 }
149
150 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
151 {
152         struct gve_rx_ring *rx = &priv->rx[idx];
153         struct device *hdev = &priv->pdev->dev;
154         u32 slots, npages;
155         int filled_pages;
156         size_t bytes;
157         int err;
158
159         netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
160         /* Make sure everything is zeroed to start with */
161         memset(rx, 0, sizeof(*rx));
162
163         rx->gve = priv;
164         rx->q_num = idx;
165
166         slots = priv->rx_data_slot_cnt;
167         rx->mask = slots - 1;
168         rx->data.raw_addressing = priv->raw_addressing;
169
170         /* alloc rx data ring */
171         bytes = sizeof(*rx->data.data_ring) * slots;
172         rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
173                                                 &rx->data.data_bus,
174                                                 GFP_KERNEL);
175         if (!rx->data.data_ring)
176                 return -ENOMEM;
177         filled_pages = gve_prefill_rx_pages(rx);
178         if (filled_pages < 0) {
179                 err = -ENOMEM;
180                 goto abort_with_slots;
181         }
182         rx->fill_cnt = filled_pages;
183         /* Ensure data ring slots (packet buffers) are visible. */
184         dma_wmb();
185
186         /* Alloc gve_queue_resources */
187         rx->q_resources =
188                 dma_alloc_coherent(hdev,
189                                    sizeof(*rx->q_resources),
190                                    &rx->q_resources_bus,
191                                    GFP_KERNEL);
192         if (!rx->q_resources) {
193                 err = -ENOMEM;
194                 goto abort_filled;
195         }
196         netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
197                   (unsigned long)rx->data.data_bus);
198
199         /* alloc rx desc ring */
200         bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
201         npages = bytes / PAGE_SIZE;
202         if (npages * PAGE_SIZE != bytes) {
203                 err = -EIO;
204                 goto abort_with_q_resources;
205         }
206
207         rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
208                                                 GFP_KERNEL);
209         if (!rx->desc.desc_ring) {
210                 err = -ENOMEM;
211                 goto abort_with_q_resources;
212         }
213         rx->cnt = 0;
214         rx->db_threshold = priv->rx_desc_cnt / 2;
215         rx->desc.seqno = 1;
216         gve_rx_add_to_block(priv, idx);
217
218         return 0;
219
220 abort_with_q_resources:
221         dma_free_coherent(hdev, sizeof(*rx->q_resources),
222                           rx->q_resources, rx->q_resources_bus);
223         rx->q_resources = NULL;
224 abort_filled:
225         gve_rx_unfill_pages(priv, rx);
226 abort_with_slots:
227         bytes = sizeof(*rx->data.data_ring) * slots;
228         dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
229         rx->data.data_ring = NULL;
230
231         return err;
232 }
233
234 int gve_rx_alloc_rings(struct gve_priv *priv)
235 {
236         int err = 0;
237         int i;
238
239         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
240                 err = gve_rx_alloc_ring(priv, i);
241                 if (err) {
242                         netif_err(priv, drv, priv->dev,
243                                   "Failed to alloc rx ring=%d: err=%d\n",
244                                   i, err);
245                         break;
246                 }
247         }
248         /* Unallocate if there was an error */
249         if (err) {
250                 int j;
251
252                 for (j = 0; j < i; j++)
253                         gve_rx_free_ring(priv, j);
254         }
255         return err;
256 }
257
258 void gve_rx_free_rings(struct gve_priv *priv)
259 {
260         int i;
261
262         for (i = 0; i < priv->rx_cfg.num_queues; i++)
263                 gve_rx_free_ring(priv, i);
264 }
265
266 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
267 {
268         u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
269
270         iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
271 }
272
273 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
274 {
275         if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
276                 return PKT_HASH_TYPE_L4;
277         if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
278                 return PKT_HASH_TYPE_L3;
279         return PKT_HASH_TYPE_L2;
280 }
281
282 static struct sk_buff *gve_rx_copy(struct net_device *dev,
283                                    struct napi_struct *napi,
284                                    struct gve_rx_slot_page_info *page_info,
285                                    u16 len)
286 {
287         struct sk_buff *skb = napi_alloc_skb(napi, len);
288         void *va = page_info->page_address + GVE_RX_PAD +
289                    (page_info->page_offset ? PAGE_SIZE / 2 : 0);
290
291         if (unlikely(!skb))
292                 return NULL;
293
294         __skb_put(skb, len);
295
296         skb_copy_to_linear_data(skb, va, len);
297
298         skb->protocol = eth_type_trans(skb, dev);
299
300         return skb;
301 }
302
303 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
304                                         struct gve_rx_slot_page_info *page_info,
305                                         u16 len)
306 {
307         struct sk_buff *skb = napi_get_frags(napi);
308
309         if (unlikely(!skb))
310                 return NULL;
311
312         skb_add_rx_frag(skb, 0, page_info->page,
313                         (page_info->page_offset ? PAGE_SIZE / 2 : 0) +
314                         GVE_RX_PAD, len, PAGE_SIZE / 2);
315
316         return skb;
317 }
318
319 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
320 {
321         const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
322
323         /* "flip" to other packet buffer on this page */
324         page_info->page_offset ^= 0x1;
325         *(slot_addr) ^= offset;
326 }
327
328 static bool gve_rx_can_flip_buffers(struct net_device *netdev)
329 {
330         return PAGE_SIZE == 4096
331                 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
332 }
333
334 static int gve_rx_can_recycle_buffer(struct page *page)
335 {
336         int pagecount = page_count(page);
337
338         /* This page is not being used by any SKBs - reuse */
339         if (pagecount == 1)
340                 return 1;
341         /* This page is still being used by an SKB - we can't reuse */
342         else if (pagecount >= 2)
343                 return 0;
344         WARN(pagecount < 1, "Pagecount should never be < 1");
345         return -1;
346 }
347
348 static struct sk_buff *
349 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
350                       struct gve_rx_slot_page_info *page_info, u16 len,
351                       struct napi_struct *napi,
352                       union gve_rx_data_slot *data_slot)
353 {
354         struct sk_buff *skb;
355
356         skb = gve_rx_add_frags(napi, page_info, len);
357         if (!skb)
358                 return NULL;
359
360         /* Optimistically stop the kernel from freeing the page by increasing
361          * the page bias. We will check the refcount in refill to determine if
362          * we need to alloc a new page.
363          */
364         get_page(page_info->page);
365
366         return skb;
367 }
368
369 static struct sk_buff *
370 gve_rx_qpl(struct device *dev, struct net_device *netdev,
371            struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
372            u16 len, struct napi_struct *napi,
373            union gve_rx_data_slot *data_slot)
374 {
375         struct sk_buff *skb;
376
377         /* if raw_addressing mode is not enabled gvnic can only receive into
378          * registered segments. If the buffer can't be recycled, our only
379          * choice is to copy the data out of it so that we can return it to the
380          * device.
381          */
382         if (page_info->can_flip) {
383                 skb = gve_rx_add_frags(napi, page_info, len);
384                 /* No point in recycling if we didn't get the skb */
385                 if (skb) {
386                         /* Make sure that the page isn't freed. */
387                         get_page(page_info->page);
388                         gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
389                 }
390         } else {
391                 skb = gve_rx_copy(netdev, napi, page_info, len);
392                 if (skb) {
393                         u64_stats_update_begin(&rx->statss);
394                         rx->rx_copied_pkt++;
395                         u64_stats_update_end(&rx->statss);
396                 }
397         }
398         return skb;
399 }
400
401 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
402                    netdev_features_t feat, u32 idx)
403 {
404         struct gve_rx_slot_page_info *page_info;
405         struct gve_priv *priv = rx->gve;
406         struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
407         struct net_device *dev = priv->dev;
408         union gve_rx_data_slot *data_slot;
409         struct sk_buff *skb = NULL;
410         dma_addr_t page_bus;
411         u16 len;
412
413         /* drop this packet */
414         if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
415                 u64_stats_update_begin(&rx->statss);
416                 rx->rx_desc_err_dropped_pkt++;
417                 u64_stats_update_end(&rx->statss);
418                 return false;
419         }
420
421         len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
422         page_info = &rx->data.page_info[idx];
423
424         data_slot = &rx->data.data_ring[idx];
425         page_bus = (rx->data.raw_addressing) ?
426                         be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
427                         rx->data.qpl->page_buses[idx];
428         dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
429                                 PAGE_SIZE, DMA_FROM_DEVICE);
430
431         if (len <= priv->rx_copybreak) {
432                 /* Just copy small packets */
433                 skb = gve_rx_copy(dev, napi, page_info, len);
434                 u64_stats_update_begin(&rx->statss);
435                 rx->rx_copied_pkt++;
436                 rx->rx_copybreak_pkt++;
437                 u64_stats_update_end(&rx->statss);
438         } else {
439                 u8 can_flip = gve_rx_can_flip_buffers(dev);
440                 int recycle = 0;
441
442                 if (can_flip) {
443                         recycle = gve_rx_can_recycle_buffer(page_info->page);
444                         if (recycle < 0) {
445                                 if (!rx->data.raw_addressing)
446                                         gve_schedule_reset(priv);
447                                 return false;
448                         }
449                 }
450
451                 page_info->can_flip = can_flip && recycle;
452                 if (rx->data.raw_addressing) {
453                         skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
454                                                     page_info, len, napi,
455                                                     data_slot);
456                 } else {
457                         skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
458                                          page_info, len, napi, data_slot);
459                 }
460         }
461
462         if (!skb) {
463                 u64_stats_update_begin(&rx->statss);
464                 rx->rx_skb_alloc_fail++;
465                 u64_stats_update_end(&rx->statss);
466                 return false;
467         }
468
469         if (likely(feat & NETIF_F_RXCSUM)) {
470                 /* NIC passes up the partial sum */
471                 if (rx_desc->csum)
472                         skb->ip_summed = CHECKSUM_COMPLETE;
473                 else
474                         skb->ip_summed = CHECKSUM_NONE;
475                 skb->csum = csum_unfold(rx_desc->csum);
476         }
477
478         /* parse flags & pass relevant info up */
479         if (likely(feat & NETIF_F_RXHASH) &&
480             gve_needs_rss(rx_desc->flags_seq))
481                 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
482                              gve_rss_type(rx_desc->flags_seq));
483
484         if (skb_is_nonlinear(skb))
485                 napi_gro_frags(napi);
486         else
487                 napi_gro_receive(napi, skb);
488         return true;
489 }
490
491 static bool gve_rx_work_pending(struct gve_rx_ring *rx)
492 {
493         struct gve_rx_desc *desc;
494         __be16 flags_seq;
495         u32 next_idx;
496
497         next_idx = rx->cnt & rx->mask;
498         desc = rx->desc.desc_ring + next_idx;
499
500         flags_seq = desc->flags_seq;
501         /* Make sure we have synchronized the seq no with the device */
502         smp_rmb();
503
504         return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
505 }
506
507 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
508 {
509         int refill_target = rx->mask + 1;
510         u32 fill_cnt = rx->fill_cnt;
511
512         while (fill_cnt - rx->cnt < refill_target) {
513                 struct gve_rx_slot_page_info *page_info;
514                 u32 idx = fill_cnt & rx->mask;
515
516                 page_info = &rx->data.page_info[idx];
517                 if (page_info->can_flip) {
518                         /* The other half of the page is free because it was
519                          * free when we processed the descriptor. Flip to it.
520                          */
521                         union gve_rx_data_slot *data_slot =
522                                                 &rx->data.data_ring[idx];
523
524                         gve_rx_flip_buff(page_info, &data_slot->addr);
525                         page_info->can_flip = 0;
526                 } else {
527                         /* It is possible that the networking stack has already
528                          * finished processing all outstanding packets in the buffer
529                          * and it can be reused.
530                          * Flipping is unnecessary here - if the networking stack still
531                          * owns half the page it is impossible to tell which half. Either
532                          * the whole page is free or it needs to be replaced.
533                          */
534                         int recycle = gve_rx_can_recycle_buffer(page_info->page);
535
536                         if (recycle < 0) {
537                                 if (!rx->data.raw_addressing)
538                                         gve_schedule_reset(priv);
539                                 return false;
540                         }
541                         if (!recycle) {
542                                 /* We can't reuse the buffer - alloc a new one*/
543                                 union gve_rx_data_slot *data_slot =
544                                                 &rx->data.data_ring[idx];
545                                 struct device *dev = &priv->pdev->dev;
546
547                                 gve_rx_free_buffer(dev, page_info, data_slot);
548                                 page_info->page = NULL;
549                                 if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot))
550                                         break;
551                         }
552                 }
553                 fill_cnt++;
554         }
555         rx->fill_cnt = fill_cnt;
556         return true;
557 }
558
559 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
560                        netdev_features_t feat)
561 {
562         struct gve_priv *priv = rx->gve;
563         u32 work_done = 0, packets = 0;
564         struct gve_rx_desc *desc;
565         u32 cnt = rx->cnt;
566         u32 idx = cnt & rx->mask;
567         u64 bytes = 0;
568
569         desc = rx->desc.desc_ring + idx;
570         while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
571                work_done < budget) {
572                 bool dropped;
573
574                 netif_info(priv, rx_status, priv->dev,
575                            "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
576                            rx->q_num, idx, desc, desc->flags_seq);
577                 netif_info(priv, rx_status, priv->dev,
578                            "[%d] seqno=%d rx->desc.seqno=%d\n",
579                            rx->q_num, GVE_SEQNO(desc->flags_seq),
580                            rx->desc.seqno);
581                 dropped = !gve_rx(rx, desc, feat, idx);
582                 if (!dropped) {
583                         bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
584                         packets++;
585                 }
586                 cnt++;
587                 idx = cnt & rx->mask;
588                 desc = rx->desc.desc_ring + idx;
589                 rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
590                 work_done++;
591         }
592
593         if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
594                 return false;
595
596         u64_stats_update_begin(&rx->statss);
597         rx->rpackets += packets;
598         rx->rbytes += bytes;
599         u64_stats_update_end(&rx->statss);
600         rx->cnt = cnt;
601
602         /* restock ring slots */
603         if (!rx->data.raw_addressing) {
604                 /* In QPL mode buffs are refilled as the desc are processed */
605                 rx->fill_cnt += work_done;
606         } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
607                 /* In raw addressing mode buffs are only refilled if the avail
608                  * falls below a threshold.
609                  */
610                 if (!gve_rx_refill_buffers(priv, rx))
611                         return false;
612
613                 /* If we were not able to completely refill buffers, we'll want
614                  * to schedule this queue for work again to refill buffers.
615                  */
616                 if (rx->fill_cnt - cnt <= rx->db_threshold) {
617                         gve_rx_write_doorbell(priv, rx);
618                         return true;
619                 }
620         }
621
622         gve_rx_write_doorbell(priv, rx);
623         return gve_rx_work_pending(rx);
624 }
625
626 bool gve_rx_poll(struct gve_notify_block *block, int budget)
627 {
628         struct gve_rx_ring *rx = block->rx;
629         netdev_features_t feat;
630         bool repoll = false;
631
632         feat = block->napi.dev->features;
633
634         /* If budget is 0, do all the work */
635         if (budget == 0)
636                 budget = INT_MAX;
637
638         if (budget > 0)
639                 repoll |= gve_clean_rx_done(rx, budget, feat);
640         else
641                 repoll |= gve_rx_work_pending(rx);
642         return repoll;
643 }
This page took 0.070286 seconds and 4 git commands to generate.