]> Git Repo - linux.git/blob - drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
Linux 6.14-rc3
[linux.git] / drivers / net / ethernet / intel / idpf / idpf_singleq_txrx.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2023 Intel Corporation */
3
4 #include <net/libeth/rx.h>
5 #include <net/libeth/tx.h>
6
7 #include "idpf.h"
8
9 /**
10  * idpf_tx_singleq_csum - Enable tx checksum offloads
11  * @skb: pointer to skb
12  * @off: pointer to struct that holds offload parameters
13  *
14  * Returns 0 or error (negative) if checksum offload cannot be executed, 1
15  * otherwise.
16  */
17 static int idpf_tx_singleq_csum(struct sk_buff *skb,
18                                 struct idpf_tx_offload_params *off)
19 {
20         u32 l4_len, l3_len, l2_len;
21         union {
22                 struct iphdr *v4;
23                 struct ipv6hdr *v6;
24                 unsigned char *hdr;
25         } ip;
26         union {
27                 struct tcphdr *tcp;
28                 unsigned char *hdr;
29         } l4;
30         u32 offset, cmd = 0;
31         u8 l4_proto = 0;
32         __be16 frag_off;
33         bool is_tso;
34
35         if (skb->ip_summed != CHECKSUM_PARTIAL)
36                 return 0;
37
38         ip.hdr = skb_network_header(skb);
39         l4.hdr = skb_transport_header(skb);
40
41         /* compute outer L2 header size */
42         l2_len = ip.hdr - skb->data;
43         offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2);
44         is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO);
45         if (skb->encapsulation) {
46                 u32 tunnel = 0;
47
48                 /* define outer network header type */
49                 if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
50                         /* The stack computes the IP header already, the only
51                          * time we need the hardware to recompute it is in the
52                          * case of TSO.
53                          */
54                         tunnel |= is_tso ?
55                                   IDPF_TX_CTX_EXT_IP_IPV4 :
56                                   IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM;
57
58                         l4_proto = ip.v4->protocol;
59                 } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
60                         tunnel |= IDPF_TX_CTX_EXT_IP_IPV6;
61
62                         l4_proto = ip.v6->nexthdr;
63                         if (ipv6_ext_hdr(l4_proto))
64                                 ipv6_skip_exthdr(skb, skb_network_offset(skb) +
65                                                  sizeof(*ip.v6),
66                                                  &l4_proto, &frag_off);
67                 }
68
69                 /* define outer transport */
70                 switch (l4_proto) {
71                 case IPPROTO_UDP:
72                         tunnel |= IDPF_TXD_CTX_UDP_TUNNELING;
73                         break;
74                 case IPPROTO_GRE:
75                         tunnel |= IDPF_TXD_CTX_GRE_TUNNELING;
76                         break;
77                 case IPPROTO_IPIP:
78                 case IPPROTO_IPV6:
79                         l4.hdr = skb_inner_network_header(skb);
80                         break;
81                 default:
82                         if (is_tso)
83                                 return -1;
84
85                         skb_checksum_help(skb);
86
87                         return 0;
88                 }
89                 off->tx_flags |= IDPF_TX_FLAGS_TUNNEL;
90
91                 /* compute outer L3 header size */
92                 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M,
93                                      (l4.hdr - ip.hdr) / 4);
94
95                 /* switch IP header pointer from outer to inner header */
96                 ip.hdr = skb_inner_network_header(skb);
97
98                 /* compute tunnel header size */
99                 tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M,
100                                      (ip.hdr - l4.hdr) / 2);
101
102                 /* indicate if we need to offload outer UDP header */
103                 if (is_tso &&
104                     !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
105                     (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
106                         tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M;
107
108                 /* record tunnel offload values */
109                 off->cd_tunneling |= tunnel;
110
111                 /* switch L4 header pointer from outer to inner */
112                 l4.hdr = skb_inner_transport_header(skb);
113                 l4_proto = 0;
114
115                 /* reset type as we transition from outer to inner headers */
116                 off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6);
117                 if (ip.v4->version == 4)
118                         off->tx_flags |= IDPF_TX_FLAGS_IPV4;
119                 if (ip.v6->version == 6)
120                         off->tx_flags |= IDPF_TX_FLAGS_IPV6;
121         }
122
123         /* Enable IP checksum offloads */
124         if (off->tx_flags & IDPF_TX_FLAGS_IPV4) {
125                 l4_proto = ip.v4->protocol;
126                 /* See comment above regarding need for HW to recompute IP
127                  * header checksum in the case of TSO.
128                  */
129                 if (is_tso)
130                         cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM;
131                 else
132                         cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4;
133
134         } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) {
135                 cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6;
136                 l4_proto = ip.v6->nexthdr;
137                 if (ipv6_ext_hdr(l4_proto))
138                         ipv6_skip_exthdr(skb, skb_network_offset(skb) +
139                                          sizeof(*ip.v6), &l4_proto,
140                                          &frag_off);
141         } else {
142                 return -1;
143         }
144
145         /* compute inner L3 header size */
146         l3_len = l4.hdr - ip.hdr;
147         offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S;
148
149         /* Enable L4 checksum offloads */
150         switch (l4_proto) {
151         case IPPROTO_TCP:
152                 /* enable checksum offloads */
153                 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP;
154                 l4_len = l4.tcp->doff;
155                 break;
156         case IPPROTO_UDP:
157                 /* enable UDP checksum offload */
158                 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP;
159                 l4_len = sizeof(struct udphdr) >> 2;
160                 break;
161         case IPPROTO_SCTP:
162                 /* enable SCTP checksum offload */
163                 cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP;
164                 l4_len = sizeof(struct sctphdr) >> 2;
165                 break;
166         default:
167                 if (is_tso)
168                         return -1;
169
170                 skb_checksum_help(skb);
171
172                 return 0;
173         }
174
175         offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S;
176         off->td_cmd |= cmd;
177         off->hdr_offsets |= offset;
178
179         return 1;
180 }
181
182 /**
183  * idpf_tx_singleq_map - Build the Tx base descriptor
184  * @tx_q: queue to send buffer on
185  * @first: first buffer info buffer to use
186  * @offloads: pointer to struct that holds offload parameters
187  *
188  * This function loops over the skb data pointed to by *first
189  * and gets a physical address for each memory location and programs
190  * it and the length into the transmit base mode descriptor.
191  */
192 static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
193                                 struct idpf_tx_buf *first,
194                                 struct idpf_tx_offload_params *offloads)
195 {
196         u32 offsets = offloads->hdr_offsets;
197         struct idpf_tx_buf *tx_buf = first;
198         struct idpf_base_tx_desc *tx_desc;
199         struct sk_buff *skb = first->skb;
200         u64 td_cmd = offloads->td_cmd;
201         unsigned int data_len, size;
202         u16 i = tx_q->next_to_use;
203         struct netdev_queue *nq;
204         skb_frag_t *frag;
205         dma_addr_t dma;
206         u64 td_tag = 0;
207
208         data_len = skb->data_len;
209         size = skb_headlen(skb);
210
211         tx_desc = &tx_q->base_tx[i];
212
213         dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);
214
215         /* write each descriptor with CRC bit */
216         if (idpf_queue_has(CRC_EN, tx_q))
217                 td_cmd |= IDPF_TX_DESC_CMD_ICRC;
218
219         for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
220                 unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
221
222                 if (dma_mapping_error(tx_q->dev, dma))
223                         return idpf_tx_dma_map_error(tx_q, skb, first, i);
224
225                 /* record length, and DMA address */
226                 dma_unmap_len_set(tx_buf, len, size);
227                 dma_unmap_addr_set(tx_buf, dma, dma);
228                 tx_buf->type = LIBETH_SQE_FRAG;
229
230                 /* align size to end of page */
231                 max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1);
232                 tx_desc->buf_addr = cpu_to_le64(dma);
233
234                 /* account for data chunks larger than the hardware
235                  * can handle
236                  */
237                 while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) {
238                         tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd,
239                                                                   offsets,
240                                                                   max_data,
241                                                                   td_tag);
242                         if (unlikely(++i == tx_q->desc_count)) {
243                                 tx_buf = &tx_q->tx_buf[0];
244                                 tx_desc = &tx_q->base_tx[0];
245                                 i = 0;
246                         } else {
247                                 tx_buf++;
248                                 tx_desc++;
249                         }
250
251                         tx_buf->type = LIBETH_SQE_EMPTY;
252
253                         dma += max_data;
254                         size -= max_data;
255
256                         max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED;
257                         tx_desc->buf_addr = cpu_to_le64(dma);
258                 }
259
260                 if (!data_len)
261                         break;
262
263                 tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
264                                                           size, td_tag);
265
266                 if (unlikely(++i == tx_q->desc_count)) {
267                         tx_buf = &tx_q->tx_buf[0];
268                         tx_desc = &tx_q->base_tx[0];
269                         i = 0;
270                 } else {
271                         tx_buf++;
272                         tx_desc++;
273                 }
274
275                 size = skb_frag_size(frag);
276                 data_len -= size;
277
278                 dma = skb_frag_dma_map(tx_q->dev, frag, 0, size,
279                                        DMA_TO_DEVICE);
280         }
281
282         skb_tx_timestamp(first->skb);
283
284         /* write last descriptor with RS and EOP bits */
285         td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS);
286
287         tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets,
288                                                   size, td_tag);
289
290         first->type = LIBETH_SQE_SKB;
291         first->rs_idx = i;
292
293         IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i);
294
295         nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
296         netdev_tx_sent_queue(nq, first->bytes);
297
298         idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
299 }
300
301 /**
302  * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring
303  * @txq: queue to put context descriptor on
304  *
305  * Since the TX buffer rings mimics the descriptor ring, update the tx buffer
306  * ring entry to reflect that this index is a context descriptor
307  */
308 static struct idpf_base_tx_ctx_desc *
309 idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq)
310 {
311         struct idpf_base_tx_ctx_desc *ctx_desc;
312         int ntu = txq->next_to_use;
313
314         txq->tx_buf[ntu].type = LIBETH_SQE_CTX;
315
316         ctx_desc = &txq->base_ctx[ntu];
317
318         IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu);
319         txq->next_to_use = ntu;
320
321         return ctx_desc;
322 }
323
324 /**
325  * idpf_tx_singleq_build_ctx_desc - populate context descriptor
326  * @txq: queue to send buffer on
327  * @offload: offload parameter structure
328  **/
329 static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq,
330                                            struct idpf_tx_offload_params *offload)
331 {
332         struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq);
333         u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX;
334
335         if (offload->tso_segs) {
336                 qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S;
337                 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M,
338                                   offload->tso_len);
339                 qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss);
340
341                 u64_stats_update_begin(&txq->stats_sync);
342                 u64_stats_inc(&txq->q_stats.lso_pkts);
343                 u64_stats_update_end(&txq->stats_sync);
344         }
345
346         desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling);
347
348         desc->qw0.l2tag2 = 0;
349         desc->qw0.rsvd1 = 0;
350         desc->qw1 = cpu_to_le64(qw1);
351 }
352
353 /**
354  * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors
355  * @skb: send buffer
356  * @tx_q: queue to send buffer on
357  *
358  * Returns NETDEV_TX_OK if sent, else an error code
359  */
360 netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
361                                   struct idpf_tx_queue *tx_q)
362 {
363         struct idpf_tx_offload_params offload = { };
364         struct idpf_tx_buf *first;
365         unsigned int count;
366         __be16 protocol;
367         int csum, tso;
368
369         count = idpf_tx_desc_count_required(tx_q, skb);
370         if (unlikely(!count))
371                 return idpf_tx_drop_skb(tx_q, skb);
372
373         if (idpf_tx_maybe_stop_common(tx_q,
374                                       count + IDPF_TX_DESCS_PER_CACHE_LINE +
375                                       IDPF_TX_DESCS_FOR_CTX)) {
376                 idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
377
378                 u64_stats_update_begin(&tx_q->stats_sync);
379                 u64_stats_inc(&tx_q->q_stats.q_busy);
380                 u64_stats_update_end(&tx_q->stats_sync);
381
382                 return NETDEV_TX_BUSY;
383         }
384
385         protocol = vlan_get_protocol(skb);
386         if (protocol == htons(ETH_P_IP))
387                 offload.tx_flags |= IDPF_TX_FLAGS_IPV4;
388         else if (protocol == htons(ETH_P_IPV6))
389                 offload.tx_flags |= IDPF_TX_FLAGS_IPV6;
390
391         tso = idpf_tso(skb, &offload);
392         if (tso < 0)
393                 goto out_drop;
394
395         csum = idpf_tx_singleq_csum(skb, &offload);
396         if (csum < 0)
397                 goto out_drop;
398
399         if (tso || offload.cd_tunneling)
400                 idpf_tx_singleq_build_ctx_desc(tx_q, &offload);
401
402         /* record the location of the first descriptor for this packet */
403         first = &tx_q->tx_buf[tx_q->next_to_use];
404         first->skb = skb;
405
406         if (tso) {
407                 first->packets = offload.tso_segs;
408                 first->bytes = skb->len + ((first->packets - 1) * offload.tso_hdr_len);
409         } else {
410                 first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
411                 first->packets = 1;
412         }
413         idpf_tx_singleq_map(tx_q, first, &offload);
414
415         return NETDEV_TX_OK;
416
417 out_drop:
418         return idpf_tx_drop_skb(tx_q, skb);
419 }
420
421 /**
422  * idpf_tx_singleq_clean - Reclaim resources from queue
423  * @tx_q: Tx queue to clean
424  * @napi_budget: Used to determine if we are in netpoll
425  * @cleaned: returns number of packets cleaned
426  *
427  */
428 static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget,
429                                   int *cleaned)
430 {
431         struct libeth_sq_napi_stats ss = { };
432         struct idpf_base_tx_desc *tx_desc;
433         u32 budget = tx_q->clean_budget;
434         s16 ntc = tx_q->next_to_clean;
435         struct libeth_cq_pp cp = {
436                 .dev    = tx_q->dev,
437                 .ss     = &ss,
438                 .napi   = napi_budget,
439         };
440         struct idpf_netdev_priv *np;
441         struct idpf_tx_buf *tx_buf;
442         struct netdev_queue *nq;
443         bool dont_wake;
444
445         tx_desc = &tx_q->base_tx[ntc];
446         tx_buf = &tx_q->tx_buf[ntc];
447         ntc -= tx_q->desc_count;
448
449         do {
450                 struct idpf_base_tx_desc *eop_desc;
451
452                 /* If this entry in the ring was used as a context descriptor,
453                  * it's corresponding entry in the buffer ring will indicate as
454                  * such. We can skip this descriptor since there is no buffer
455                  * to clean.
456                  */
457                 if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) {
458                         tx_buf->type = LIBETH_SQE_EMPTY;
459                         goto fetch_next_txq_desc;
460                 }
461
462                 if (unlikely(tx_buf->type != LIBETH_SQE_SKB))
463                         break;
464
465                 /* prevent any other reads prior to type */
466                 smp_rmb();
467
468                 eop_desc = &tx_q->base_tx[tx_buf->rs_idx];
469
470                 /* if the descriptor isn't done, no work yet to do */
471                 if (!(eop_desc->qw1 &
472                       cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE)))
473                         break;
474
475                 /* update the statistics for this packet */
476                 libeth_tx_complete(tx_buf, &cp);
477
478                 /* unmap remaining buffers */
479                 while (tx_desc != eop_desc) {
480                         tx_buf++;
481                         tx_desc++;
482                         ntc++;
483                         if (unlikely(!ntc)) {
484                                 ntc -= tx_q->desc_count;
485                                 tx_buf = tx_q->tx_buf;
486                                 tx_desc = &tx_q->base_tx[0];
487                         }
488
489                         /* unmap any remaining paged data */
490                         libeth_tx_complete(tx_buf, &cp);
491                 }
492
493                 /* update budget only if we did something */
494                 budget--;
495
496 fetch_next_txq_desc:
497                 tx_buf++;
498                 tx_desc++;
499                 ntc++;
500                 if (unlikely(!ntc)) {
501                         ntc -= tx_q->desc_count;
502                         tx_buf = tx_q->tx_buf;
503                         tx_desc = &tx_q->base_tx[0];
504                 }
505         } while (likely(budget));
506
507         ntc += tx_q->desc_count;
508         tx_q->next_to_clean = ntc;
509
510         *cleaned += ss.packets;
511
512         u64_stats_update_begin(&tx_q->stats_sync);
513         u64_stats_add(&tx_q->q_stats.packets, ss.packets);
514         u64_stats_add(&tx_q->q_stats.bytes, ss.bytes);
515         u64_stats_update_end(&tx_q->stats_sync);
516
517         np = netdev_priv(tx_q->netdev);
518         nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
519
520         dont_wake = np->state != __IDPF_VPORT_UP ||
521                     !netif_carrier_ok(tx_q->netdev);
522         __netif_txq_completed_wake(nq, ss.packets, ss.bytes,
523                                    IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
524                                    dont_wake);
525
526         return !!budget;
527 }
528
529 /**
530  * idpf_tx_singleq_clean_all - Clean all Tx queues
531  * @q_vec: queue vector
532  * @budget: Used to determine if we are in netpoll
533  * @cleaned: returns number of packets cleaned
534  *
535  * Returns false if clean is not complete else returns true
536  */
537 static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
538                                       int *cleaned)
539 {
540         u16 num_txq = q_vec->num_txq;
541         bool clean_complete = true;
542         int i, budget_per_q;
543
544         budget_per_q = num_txq ? max(budget / num_txq, 1) : 0;
545         for (i = 0; i < num_txq; i++) {
546                 struct idpf_tx_queue *q;
547
548                 q = q_vec->tx[i];
549                 clean_complete &= idpf_tx_singleq_clean(q, budget_per_q,
550                                                         cleaned);
551         }
552
553         return clean_complete;
554 }
555
556 /**
557  * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor
558  * status and error fields
559  * @rx_desc: pointer to receive descriptor (in le64 format)
560  * @stat_err_bits: value to mask
561  *
562  * This function does some fast chicanery in order to return the
563  * value of the mask which is really only used for boolean tests.
564  * The status_error_ptype_len doesn't need to be shifted because it begins
565  * at offset zero.
566  */
567 static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc,
568                                          const u64 stat_err_bits)
569 {
570         return !!(rx_desc->base_wb.qword1.status_error_ptype_len &
571                   cpu_to_le64(stat_err_bits));
572 }
573
574 /**
575  * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers
576  * @rx_desc: Rx descriptor for current buffer
577  */
578 static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc)
579 {
580         /* if we are the last buffer then there is nothing else to do */
581         if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ)))
582                 return false;
583
584         return true;
585 }
586
587 /**
588  * idpf_rx_singleq_csum - Indicate in skb if checksum is good
589  * @rxq: Rx ring being processed
590  * @skb: skb currently being received and modified
591  * @csum_bits: checksum bits from descriptor
592  * @decoded: the packet type decoded by hardware
593  *
594  * skb->protocol must be set before this function is called
595  */
596 static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq,
597                                  struct sk_buff *skb,
598                                  struct idpf_rx_csum_decoded csum_bits,
599                                  struct libeth_rx_pt decoded)
600 {
601         bool ipv4, ipv6;
602
603         /* check if Rx checksum is enabled */
604         if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded))
605                 return;
606
607         /* check if HW has decoded the packet and checksum */
608         if (unlikely(!csum_bits.l3l4p))
609                 return;
610
611         ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4;
612         ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6;
613
614         /* Check if there were any checksum errors */
615         if (unlikely(ipv4 && (csum_bits.ipe || csum_bits.eipe)))
616                 goto checksum_fail;
617
618         /* Device could not do any checksum offload for certain extension
619          * headers as indicated by setting IPV6EXADD bit
620          */
621         if (unlikely(ipv6 && csum_bits.ipv6exadd))
622                 return;
623
624         /* check for L4 errors and handle packets that were not able to be
625          * checksummed due to arrival speed
626          */
627         if (unlikely(csum_bits.l4e))
628                 goto checksum_fail;
629
630         if (unlikely(csum_bits.nat && csum_bits.eudpe))
631                 goto checksum_fail;
632
633         /* Handle packets that were not able to be checksummed due to arrival
634          * speed, in this case the stack can compute the csum.
635          */
636         if (unlikely(csum_bits.pprs))
637                 return;
638
639         /* If there is an outer header present that might contain a checksum
640          * we need to bump the checksum level by 1 to reflect the fact that
641          * we are indicating we validated the inner checksum.
642          */
643         if (decoded.tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT)
644                 skb->csum_level = 1;
645
646         skb->ip_summed = CHECKSUM_UNNECESSARY;
647         return;
648
649 checksum_fail:
650         u64_stats_update_begin(&rxq->stats_sync);
651         u64_stats_inc(&rxq->q_stats.hw_csum_err);
652         u64_stats_update_end(&rxq->stats_sync);
653 }
654
655 /**
656  * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum
657  * @rx_desc: the receive descriptor
658  *
659  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
660  * descriptor writeback format.
661  *
662  * Return: parsed checksum status.
663  **/
664 static struct idpf_rx_csum_decoded
665 idpf_rx_singleq_base_csum(const union virtchnl2_rx_desc *rx_desc)
666 {
667         struct idpf_rx_csum_decoded csum_bits = { };
668         u32 rx_error, rx_status;
669         u64 qword;
670
671         qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
672
673         rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword);
674         rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword);
675
676         csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error);
677         csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M,
678                                    rx_error);
679         csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error);
680         csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M,
681                                    rx_error);
682         csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M,
683                                     rx_status);
684         csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M,
685                                         rx_status);
686
687         return csum_bits;
688 }
689
690 /**
691  * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum
692  * @rx_desc: the receive descriptor
693  *
694  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
695  * descriptor writeback format.
696  *
697  * Return: parsed checksum status.
698  **/
699 static struct idpf_rx_csum_decoded
700 idpf_rx_singleq_flex_csum(const union virtchnl2_rx_desc *rx_desc)
701 {
702         struct idpf_rx_csum_decoded csum_bits = { };
703         u16 rx_status0, rx_status1;
704
705         rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0);
706         rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1);
707
708         csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M,
709                                   rx_status0);
710         csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M,
711                                    rx_status0);
712         csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M,
713                                   rx_status0);
714         csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M,
715                                     rx_status0);
716         csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M,
717                                     rx_status0);
718         csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M,
719                                         rx_status0);
720         csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M,
721                                   rx_status1);
722
723         return csum_bits;
724 }
725
726 /**
727  * idpf_rx_singleq_base_hash - set the hash value in the skb
728  * @rx_q: Rx completion queue
729  * @skb: skb currently being received and modified
730  * @rx_desc: specific descriptor
731  * @decoded: Decoded Rx packet type related fields
732  *
733  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
734  * descriptor writeback format.
735  **/
736 static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q,
737                                       struct sk_buff *skb,
738                                       const union virtchnl2_rx_desc *rx_desc,
739                                       struct libeth_rx_pt decoded)
740 {
741         u64 mask, qw1;
742
743         if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded))
744                 return;
745
746         mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M;
747         qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
748
749         if (FIELD_GET(mask, qw1) == mask) {
750                 u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss);
751
752                 libeth_rx_pt_set_hash(skb, hash, decoded);
753         }
754 }
755
756 /**
757  * idpf_rx_singleq_flex_hash - set the hash value in the skb
758  * @rx_q: Rx completion queue
759  * @skb: skb currently being received and modified
760  * @rx_desc: specific descriptor
761  * @decoded: Decoded Rx packet type related fields
762  *
763  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
764  * descriptor writeback format.
765  **/
766 static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q,
767                                       struct sk_buff *skb,
768                                       const union virtchnl2_rx_desc *rx_desc,
769                                       struct libeth_rx_pt decoded)
770 {
771         if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded))
772                 return;
773
774         if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M,
775                       le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) {
776                 u32 hash = le32_to_cpu(rx_desc->flex_nic_wb.rss_hash);
777
778                 libeth_rx_pt_set_hash(skb, hash, decoded);
779         }
780 }
781
782 /**
783  * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx
784  * descriptor
785  * @rx_q: Rx ring being processed
786  * @skb: pointer to current skb being populated
787  * @rx_desc: descriptor for skb
788  * @ptype: packet type
789  *
790  * This function checks the ring, descriptor, and packet information in
791  * order to populate the hash, checksum, VLAN, protocol, and
792  * other fields within the skb.
793  */
794 static void
795 idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
796                                    struct sk_buff *skb,
797                                    const union virtchnl2_rx_desc *rx_desc,
798                                    u16 ptype)
799 {
800         struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype];
801         struct idpf_rx_csum_decoded csum_bits;
802
803         /* modifies the skb - consumes the enet header */
804         skb->protocol = eth_type_trans(skb, rx_q->netdev);
805
806         /* Check if we're using base mode descriptor IDs */
807         if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) {
808                 idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded);
809                 csum_bits = idpf_rx_singleq_base_csum(rx_desc);
810         } else {
811                 idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, decoded);
812                 csum_bits = idpf_rx_singleq_flex_csum(rx_desc);
813         }
814
815         idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded);
816         skb_record_rx_queue(skb, rx_q->idx);
817 }
818
819 /**
820  * idpf_rx_buf_hw_update - Store the new tail and head values
821  * @rxq: queue to bump
822  * @val: new head index
823  */
824 static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val)
825 {
826         rxq->next_to_use = val;
827
828         if (unlikely(!rxq->tail))
829                 return;
830
831         /* writel has an implicit memory barrier */
832         writel(val, rxq->tail);
833 }
834
835 /**
836  * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers
837  * @rx_q: queue for which the hw buffers are allocated
838  * @cleaned_count: number of buffers to replace
839  *
840  * Returns false if all allocations were successful, true if any fail
841  */
842 bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
843                                       u16 cleaned_count)
844 {
845         struct virtchnl2_singleq_rx_buf_desc *desc;
846         const struct libeth_fq_fp fq = {
847                 .pp             = rx_q->pp,
848                 .fqes           = rx_q->rx_buf,
849                 .truesize       = rx_q->truesize,
850                 .count          = rx_q->desc_count,
851         };
852         u16 nta = rx_q->next_to_alloc;
853
854         if (!cleaned_count)
855                 return false;
856
857         desc = &rx_q->single_buf[nta];
858
859         do {
860                 dma_addr_t addr;
861
862                 addr = libeth_rx_alloc(&fq, nta);
863                 if (addr == DMA_MAPPING_ERROR)
864                         break;
865
866                 /* Refresh the desc even if buffer_addrs didn't change
867                  * because each write-back erases this info.
868                  */
869                 desc->pkt_addr = cpu_to_le64(addr);
870                 desc->hdr_addr = 0;
871                 desc++;
872
873                 nta++;
874                 if (unlikely(nta == rx_q->desc_count)) {
875                         desc = &rx_q->single_buf[0];
876                         nta = 0;
877                 }
878
879                 cleaned_count--;
880         } while (cleaned_count);
881
882         if (rx_q->next_to_alloc != nta) {
883                 idpf_rx_buf_hw_update(rx_q, nta);
884                 rx_q->next_to_alloc = nta;
885         }
886
887         return !!cleaned_count;
888 }
889
890 /**
891  * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor
892  * @rx_desc: the descriptor to process
893  * @fields: storage for extracted values
894  *
895  * Decode the Rx descriptor and extract relevant information including the
896  * size and Rx packet type.
897  *
898  * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
899  * descriptor writeback format.
900  */
901 static void
902 idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc,
903                                     struct idpf_rx_extracted *fields)
904 {
905         u64 qword;
906
907         qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len);
908
909         fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword);
910         fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword);
911 }
912
913 /**
914  * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor
915  * @rx_desc: the descriptor to process
916  * @fields: storage for extracted values
917  *
918  * Decode the Rx descriptor and extract relevant information including the
919  * size and Rx packet type.
920  *
921  * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
922  * descriptor writeback format.
923  */
924 static void
925 idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc,
926                                     struct idpf_rx_extracted *fields)
927 {
928         fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
929                                  le16_to_cpu(rx_desc->flex_nic_wb.pkt_len));
930         fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M,
931                                      le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0));
932 }
933
934 /**
935  * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor
936  * @rx_q: Rx descriptor queue
937  * @rx_desc: the descriptor to process
938  * @fields: storage for extracted values
939  *
940  */
941 static void
942 idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q,
943                                const union virtchnl2_rx_desc *rx_desc,
944                                struct idpf_rx_extracted *fields)
945 {
946         if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M)
947                 idpf_rx_singleq_extract_base_fields(rx_desc, fields);
948         else
949                 idpf_rx_singleq_extract_flex_fields(rx_desc, fields);
950 }
951
952 /**
953  * idpf_rx_singleq_clean - Reclaim resources after receive completes
954  * @rx_q: rx queue to clean
955  * @budget: Total limit on number of packets to process
956  *
957  * Returns true if there's any budget left (e.g. the clean is finished)
958  */
959 static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
960 {
961         unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
962         struct sk_buff *skb = rx_q->skb;
963         u16 ntc = rx_q->next_to_clean;
964         u16 cleaned_count = 0;
965         bool failure = false;
966
967         /* Process Rx packets bounded by budget */
968         while (likely(total_rx_pkts < (unsigned int)budget)) {
969                 struct idpf_rx_extracted fields = { };
970                 union virtchnl2_rx_desc *rx_desc;
971                 struct idpf_rx_buf *rx_buf;
972
973                 /* get the Rx desc from Rx queue based on 'next_to_clean' */
974                 rx_desc = &rx_q->rx[ntc];
975
976                 /* status_error_ptype_len will always be zero for unused
977                  * descriptors because it's cleared in cleanup, and overlaps
978                  * with hdr_addr which is always zero because packet split
979                  * isn't used, if the hardware wrote DD then the length will be
980                  * non-zero
981                  */
982 #define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M
983                 if (!idpf_rx_singleq_test_staterr(rx_desc,
984                                                   IDPF_RXD_DD))
985                         break;
986
987                 /* This memory barrier is needed to keep us from reading
988                  * any other fields out of the rx_desc
989                  */
990                 dma_rmb();
991
992                 idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);
993
994                 rx_buf = &rx_q->rx_buf[ntc];
995                 if (!libeth_rx_sync_for_cpu(rx_buf, fields.size))
996                         goto skip_data;
997
998                 if (skb)
999                         idpf_rx_add_frag(rx_buf, skb, fields.size);
1000                 else
1001                         skb = idpf_rx_build_skb(rx_buf, fields.size);
1002
1003                 /* exit if we failed to retrieve a buffer */
1004                 if (!skb)
1005                         break;
1006
1007 skip_data:
1008                 rx_buf->page = NULL;
1009
1010                 IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
1011                 cleaned_count++;
1012
1013                 /* skip if it is non EOP desc */
1014                 if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb))
1015                         continue;
1016
1017 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
1018                                   VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M)
1019                 if (unlikely(idpf_rx_singleq_test_staterr(rx_desc,
1020                                                           IDPF_RXD_ERR_S))) {
1021                         dev_kfree_skb_any(skb);
1022                         skb = NULL;
1023                         continue;
1024                 }
1025
1026                 /* pad skb if needed (to make valid ethernet frame) */
1027                 if (eth_skb_pad(skb)) {
1028                         skb = NULL;
1029                         continue;
1030                 }
1031
1032                 /* probably a little skewed due to removing CRC */
1033                 total_rx_bytes += skb->len;
1034
1035                 /* protocol */
1036                 idpf_rx_singleq_process_skb_fields(rx_q, skb,
1037                                                    rx_desc, fields.rx_ptype);
1038
1039                 /* send completed skb up the stack */
1040                 napi_gro_receive(rx_q->pp->p.napi, skb);
1041                 skb = NULL;
1042
1043                 /* update budget accounting */
1044                 total_rx_pkts++;
1045         }
1046
1047         rx_q->skb = skb;
1048
1049         rx_q->next_to_clean = ntc;
1050
1051         page_pool_nid_changed(rx_q->pp, numa_mem_id());
1052         if (cleaned_count)
1053                 failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);
1054
1055         u64_stats_update_begin(&rx_q->stats_sync);
1056         u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts);
1057         u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes);
1058         u64_stats_update_end(&rx_q->stats_sync);
1059
1060         /* guarantee a trip back through this routine if there was a failure */
1061         return failure ? budget : (int)total_rx_pkts;
1062 }
1063
1064 /**
1065  * idpf_rx_singleq_clean_all - Clean all Rx queues
1066  * @q_vec: queue vector
1067  * @budget: Used to determine if we are in netpoll
1068  * @cleaned: returns number of packets cleaned
1069  *
1070  * Returns false if clean is not complete else returns true
1071  */
1072 static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
1073                                       int *cleaned)
1074 {
1075         u16 num_rxq = q_vec->num_rxq;
1076         bool clean_complete = true;
1077         int budget_per_q, i;
1078
1079         /* We attempt to distribute budget to each Rx queue fairly, but don't
1080          * allow the budget to go below 1 because that would exit polling early.
1081          */
1082         budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
1083         for (i = 0; i < num_rxq; i++) {
1084                 struct idpf_rx_queue *rxq = q_vec->rx[i];
1085                 int pkts_cleaned_per_q;
1086
1087                 pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q);
1088
1089                 /* if we clean as many as budgeted, we must not be done */
1090                 if (pkts_cleaned_per_q >= budget_per_q)
1091                         clean_complete = false;
1092                 *cleaned += pkts_cleaned_per_q;
1093         }
1094
1095         return clean_complete;
1096 }
1097
1098 /**
1099  * idpf_vport_singleq_napi_poll - NAPI handler
1100  * @napi: struct from which you get q_vector
1101  * @budget: budget provided by stack
1102  */
1103 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget)
1104 {
1105         struct idpf_q_vector *q_vector =
1106                                 container_of(napi, struct idpf_q_vector, napi);
1107         bool clean_complete;
1108         int work_done = 0;
1109
1110         /* Handle case where we are called by netpoll with a budget of 0 */
1111         if (budget <= 0) {
1112                 idpf_tx_singleq_clean_all(q_vector, budget, &work_done);
1113
1114                 return budget;
1115         }
1116
1117         clean_complete = idpf_rx_singleq_clean_all(q_vector, budget,
1118                                                    &work_done);
1119         clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget,
1120                                                     &work_done);
1121
1122         /* If work not completed, return budget and polling will return */
1123         if (!clean_complete) {
1124                 idpf_vport_intr_set_wb_on_itr(q_vector);
1125                 return budget;
1126         }
1127
1128         work_done = min_t(int, work_done, budget - 1);
1129
1130         /* Exit the polling mode, but don't re-enable interrupts if stack might
1131          * poll us due to busy-polling
1132          */
1133         if (likely(napi_complete_done(napi, work_done)))
1134                 idpf_vport_intr_update_itr_ena_irq(q_vector);
1135         else
1136                 idpf_vport_intr_set_wb_on_itr(q_vector);
1137
1138         return work_done;
1139 }
This page took 0.101214 seconds and 4 git commands to generate.