2 * Copyright (C) 2015 Cavium, Inc.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License
6 * as published by the Free Software Foundation.
10 #include <linux/netdevice.h>
12 #include <linux/etherdevice.h>
13 #include <linux/iommu.h>
20 #include "nicvf_queues.h"
22 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
24 static void nicvf_get_page(struct nicvf *nic)
26 if (!nic->rb_pageref || !nic->rb_page)
29 page_ref_add(nic->rb_page, nic->rb_pageref);
33 /* Poll a register for a specific value */
34 static int nicvf_poll_reg(struct nicvf *nic, int qidx,
35 u64 reg, int bit_pos, int bits, int val)
41 bit_mask = (1ULL << bits) - 1;
42 bit_mask = (bit_mask << bit_pos);
45 reg_val = nicvf_queue_reg_read(nic, reg, qidx);
46 if (((reg_val & bit_mask) >> bit_pos) == val)
48 usleep_range(1000, 2000);
51 netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg);
55 /* Allocate memory for a queue's descriptors */
56 static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
57 int q_len, int desc_size, int align_bytes)
60 dmem->size = (desc_size * q_len) + align_bytes;
61 /* Save address, need it while freeing */
62 dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size,
63 &dmem->dma, GFP_KERNEL);
64 if (!dmem->unalign_base)
67 /* Align memory address for 'align_bytes' */
68 dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes);
69 dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma);
73 /* Free queue's descriptor memory */
74 static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
79 dma_free_coherent(&nic->pdev->dev, dmem->size,
80 dmem->unalign_base, dmem->dma);
81 dmem->unalign_base = NULL;
85 #define XDP_PAGE_REFCNT_REFILL 256
87 /* Allocate a new page or recycle one if possible
89 * We cannot optimize dma mapping here, since
90 * 1. It's only one RBDR ring for 8 Rx queues.
91 * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
92 * and not idx into RBDR ring, so can't refer to saved info.
93 * 3. There are multiple receive buffers per page
95 static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic,
96 struct rbdr *rbdr, gfp_t gfp)
99 struct page *page = NULL;
100 struct pgcache *pgcache, *next;
102 /* Check if page is already allocated */
103 pgcache = &rbdr->pgcache[rbdr->pgidx];
104 page = pgcache->page;
105 /* Check if page can be recycled */
107 ref_count = page_ref_count(page);
108 /* Check if this page has been used once i.e 'put_page'
109 * called after packet transmission i.e internal ref_count
110 * and page's ref_count are equal i.e page can be recycled.
112 if (rbdr->is_xdp && (ref_count == pgcache->ref_count))
113 pgcache->ref_count--;
117 /* In non-XDP mode, page's ref_count needs to be '1' for it
120 if (!rbdr->is_xdp && (ref_count != 1))
125 page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0);
129 this_cpu_inc(nic->pnicvf->drv_stats->page_alloc);
131 /* Check for space */
132 if (rbdr->pgalloc >= rbdr->pgcnt) {
133 /* Page can still be used */
138 /* Save the page in page cache */
139 pgcache->page = page;
140 pgcache->dma_addr = 0;
141 pgcache->ref_count = 0;
145 /* Take additional page references for recycling */
147 /* Since there is single RBDR (i.e single core doing
148 * page recycling) per 8 Rx queues, in XDP mode adjusting
149 * page references atomically is the biggest bottleneck, so
150 * take bunch of references at a time.
152 * So here, below reference counts defer by '1'.
154 if (!pgcache->ref_count) {
155 pgcache->ref_count = XDP_PAGE_REFCNT_REFILL;
156 page_ref_add(page, XDP_PAGE_REFCNT_REFILL);
159 /* In non-XDP case, single 64K page is divided across multiple
160 * receive buffers, so cost of recycling is less anyway.
161 * So we can do with just one extra reference.
163 page_ref_add(page, 1);
167 rbdr->pgidx &= (rbdr->pgcnt - 1);
169 /* Prefetch refcount of next page in page cache */
170 next = &rbdr->pgcache[rbdr->pgidx];
173 prefetch(&page->_refcount);
178 /* Allocate buffer for packet reception */
179 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
180 gfp_t gfp, u32 buf_len, u64 *rbuf)
182 struct pgcache *pgcache = NULL;
184 /* Check if request can be accomodated in previous allocated page.
185 * But in XDP mode only one buffer per page is permitted.
187 if (!rbdr->is_xdp && nic->rb_page &&
188 ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) {
196 /* Get new page, either recycled or new one */
197 pgcache = nicvf_alloc_page(nic, rbdr, gfp);
198 if (!pgcache && !nic->rb_page) {
199 this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
203 nic->rb_page_offset = 0;
205 /* Reserve space for header modifications by BPF program */
207 buf_len += XDP_PACKET_HEADROOM;
209 /* Check if it's recycled */
211 nic->rb_page = pgcache->page;
213 if (rbdr->is_xdp && pgcache && pgcache->dma_addr) {
214 *rbuf = pgcache->dma_addr;
216 /* HW will ensure data coherency, CPU sync not required */
217 *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
218 nic->rb_page_offset, buf_len,
220 DMA_ATTR_SKIP_CPU_SYNC);
221 if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
222 if (!nic->rb_page_offset)
223 __free_pages(nic->rb_page, 0);
228 pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
229 nic->rb_page_offset += buf_len;
235 /* Build skb around receive buffer */
236 static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic,
242 data = phys_to_virt(rb_ptr);
244 /* Now build an skb to give to stack */
245 skb = build_skb(data, RCV_FRAG_LEN);
247 put_page(virt_to_page(data));
255 /* Allocate RBDR ring and populate receive buffers */
256 static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
257 int ring_len, int buf_size)
261 struct rbdr_entry_t *desc;
264 err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
265 sizeof(struct rbdr_entry_t),
266 NICVF_RCV_BUF_ALIGN_BYTES);
270 rbdr->desc = rbdr->dmem.base;
271 /* Buffer size has to be in multiples of 128 bytes */
272 rbdr->dma_size = buf_size;
274 rbdr->thresh = RBDR_THRESH;
278 /* Initialize page recycling stuff.
280 * Can't use single buffer per page especially with 64K pages.
281 * On embedded platforms i.e 81xx/83xx available memory itself
282 * is low and minimum ring size of RBDR is 8K, that takes away
285 * But for XDP it has to be a single buffer per page.
287 if (!nic->pnicvf->xdp_prog) {
288 rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size);
289 rbdr->is_xdp = false;
291 rbdr->pgcnt = ring_len;
294 rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt);
295 rbdr->pgcache = kcalloc(rbdr->pgcnt, sizeof(*rbdr->pgcache),
303 for (idx = 0; idx < ring_len; idx++) {
304 err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL,
305 RCV_FRAG_LEN, &rbuf);
307 /* To free already allocated and mapped ones */
308 rbdr->tail = idx - 1;
312 desc = GET_RBDR_DESC(rbdr, idx);
313 desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
321 /* Free RBDR ring and its receive buffers */
322 static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
325 u64 buf_addr, phys_addr;
326 struct pgcache *pgcache;
327 struct rbdr_entry_t *desc;
332 rbdr->enable = false;
333 if (!rbdr->dmem.base)
339 /* Release page references */
340 while (head != tail) {
341 desc = GET_RBDR_DESC(rbdr, head);
342 buf_addr = desc->buf_addr;
343 phys_addr = nicvf_iova_to_phys(nic, buf_addr);
344 dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
345 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
347 put_page(virt_to_page(phys_to_virt(phys_addr)));
349 head &= (rbdr->dmem.q_len - 1);
351 /* Release buffer of tail desc */
352 desc = GET_RBDR_DESC(rbdr, tail);
353 buf_addr = desc->buf_addr;
354 phys_addr = nicvf_iova_to_phys(nic, buf_addr);
355 dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
356 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
358 put_page(virt_to_page(phys_to_virt(phys_addr)));
360 /* Sync page cache info */
363 /* Release additional page references held for recycling */
365 while (head < rbdr->pgcnt) {
366 pgcache = &rbdr->pgcache[head];
367 if (pgcache->page && page_ref_count(pgcache->page) != 0) {
369 put_page(pgcache->page);
372 page_ref_sub(pgcache->page, pgcache->ref_count - 1);
373 put_page(pgcache->page);
379 nicvf_free_q_desc_mem(nic, &rbdr->dmem);
382 /* Refill receive buffer descriptors with new buffers.
384 static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
386 struct queue_set *qs = nic->qs;
387 int rbdr_idx = qs->rbdr_cnt;
391 struct rbdr_entry_t *desc;
399 rbdr = &qs->rbdr[rbdr_idx];
400 /* Check if it's enabled */
404 /* Get no of desc's to be refilled */
405 qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
407 /* Doorbell can be ringed with a max of ring size minus 1 */
408 if (qcount >= (qs->rbdr_len - 1))
411 refill_rb_cnt = qs->rbdr_len - qcount - 1;
413 /* Sync page cache info */
416 /* Start filling descs from tail */
417 tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
418 while (refill_rb_cnt) {
420 tail &= (rbdr->dmem.q_len - 1);
422 if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf))
425 desc = GET_RBDR_DESC(rbdr, tail);
426 desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
433 /* make sure all memory stores are done before ringing doorbell */
436 /* Check if buffer allocation failed */
438 nic->rb_alloc_fail = true;
440 nic->rb_alloc_fail = false;
443 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
446 /* Re-enable RBDR interrupts only if buffer allocation is success */
447 if (!nic->rb_alloc_fail && rbdr->enable &&
448 netif_running(nic->pnicvf->netdev))
449 nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
455 /* Alloc rcv buffers in non-atomic mode for better success */
456 void nicvf_rbdr_work(struct work_struct *work)
458 struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work);
460 nicvf_refill_rbdr(nic, GFP_KERNEL);
461 if (nic->rb_alloc_fail)
462 schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
464 nic->rb_work_scheduled = false;
467 /* In Softirq context, alloc rcv buffers in atomic mode */
468 void nicvf_rbdr_task(unsigned long data)
470 struct nicvf *nic = (struct nicvf *)data;
472 nicvf_refill_rbdr(nic, GFP_ATOMIC);
473 if (nic->rb_alloc_fail) {
474 nic->rb_work_scheduled = true;
475 schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
479 /* Initialize completion queue */
480 static int nicvf_init_cmp_queue(struct nicvf *nic,
481 struct cmp_queue *cq, int q_len)
485 err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
486 NICVF_CQ_BASE_ALIGN_BYTES);
490 cq->desc = cq->dmem.base;
491 cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH;
492 nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
497 static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
504 nicvf_free_q_desc_mem(nic, &cq->dmem);
507 /* Initialize transmit queue */
508 static int nicvf_init_snd_queue(struct nicvf *nic,
509 struct snd_queue *sq, int q_len, int qidx)
513 err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
514 NICVF_SQ_BASE_ALIGN_BYTES);
518 sq->desc = sq->dmem.base;
519 sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
525 sq->thresh = SND_QUEUE_THRESH;
527 /* Check if this SQ is a XDP TX queue */
529 qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS);
530 if (qidx < nic->pnicvf->xdp_tx_queues) {
531 /* Alloc memory to save page pointers for XDP_TX */
532 sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
535 sq->xdp_desc_cnt = 0;
536 sq->xdp_free_cnt = q_len - 1;
540 sq->xdp_desc_cnt = 0;
541 sq->xdp_free_cnt = 0;
544 atomic_set(&sq->free_cnt, q_len - 1);
546 /* Preallocate memory for TSO segment's header */
547 sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
548 q_len * TSO_HEADER_SIZE,
558 void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
559 int hdr_sqe, u8 subdesc_cnt)
562 struct sq_gather_subdesc *gather;
564 /* Unmap DMA mapped skb data buffers */
565 for (idx = 0; idx < subdesc_cnt; idx++) {
567 hdr_sqe &= (sq->dmem.q_len - 1);
568 gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
569 /* HW will ensure data coherency, CPU sync not required */
570 dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
571 gather->size, DMA_TO_DEVICE,
572 DMA_ATTR_SKIP_CPU_SYNC);
576 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
580 struct sq_hdr_subdesc *hdr;
581 struct sq_hdr_subdesc *tso_sqe;
589 dma_free_coherent(&nic->pdev->dev,
590 sq->dmem.q_len * TSO_HEADER_SIZE,
591 sq->tso_hdrs, sq->tso_hdrs_phys);
595 /* Free pending skbs in the queue */
597 while (sq->head != sq->tail) {
598 skb = (struct sk_buff *)sq->skbuff[sq->head];
599 if (!skb || !sq->xdp_page)
602 page = (struct page *)sq->xdp_page[sq->head];
608 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
609 /* Check for dummy descriptor used for HW TSO offload on 88xx */
610 if (hdr->dont_send) {
611 /* Get actual TSO descriptors and unmap them */
613 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
614 nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
615 tso_sqe->subdesc_cnt);
617 nicvf_unmap_sndq_buffers(nic, sq, sq->head,
621 dev_kfree_skb_any(skb);
624 sq->head &= (sq->dmem.q_len - 1);
628 nicvf_free_q_desc_mem(nic, &sq->dmem);
631 static void nicvf_reclaim_snd_queue(struct nicvf *nic,
632 struct queue_set *qs, int qidx)
634 /* Disable send queue */
635 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
636 /* Check if SQ is stopped */
637 if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
639 /* Reset send queue */
640 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
643 static void nicvf_reclaim_rcv_queue(struct nicvf *nic,
644 struct queue_set *qs, int qidx)
646 union nic_mbx mbx = {};
648 /* Make sure all packets in the pipeline are written back into mem */
649 mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
650 nicvf_send_msg_to_pf(nic, &mbx);
653 static void nicvf_reclaim_cmp_queue(struct nicvf *nic,
654 struct queue_set *qs, int qidx)
656 /* Disable timer threshold (doesn't get reset upon CQ reset */
657 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
658 /* Disable completion queue */
659 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
660 /* Reset completion queue */
661 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
664 static void nicvf_reclaim_rbdr(struct nicvf *nic,
665 struct rbdr *rbdr, int qidx)
670 /* Save head and tail pointers for feeing up buffers */
671 rbdr->head = nicvf_queue_reg_read(nic,
672 NIC_QSET_RBDR_0_1_HEAD,
674 rbdr->tail = nicvf_queue_reg_read(nic,
675 NIC_QSET_RBDR_0_1_TAIL,
678 /* If RBDR FIFO is in 'FAIL' state then do a reset first
681 fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
682 if (((fifo_state >> 62) & 0x03) == 0x3)
683 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
684 qidx, NICVF_RBDR_RESET);
687 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
688 if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
691 tmp = nicvf_queue_reg_read(nic,
692 NIC_QSET_RBDR_0_1_PREFETCH_STATUS,
694 if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
696 usleep_range(1000, 2000);
699 netdev_err(nic->netdev,
700 "Failed polling on prefetch status\n");
704 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
705 qidx, NICVF_RBDR_RESET);
707 if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
709 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
710 if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
714 void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
719 rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
721 /* Enable first VLAN stripping */
722 if (features & NETIF_F_HW_VLAN_CTAG_RX)
723 rq_cfg |= (1ULL << 25);
725 rq_cfg &= ~(1ULL << 25);
726 nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
728 /* Configure Secondary Qsets, if any */
729 for (sqs = 0; sqs < nic->sqs_count; sqs++)
730 if (nic->snicvf[sqs])
731 nicvf_queue_reg_write(nic->snicvf[sqs],
732 NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
735 static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
737 union nic_mbx mbx = {};
739 /* Reset all RQ/SQ and VF stats */
740 mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
741 mbx.reset_stat.rx_stat_mask = 0x3FFF;
742 mbx.reset_stat.tx_stat_mask = 0x1F;
743 mbx.reset_stat.rq_stat_mask = 0xFFFF;
744 mbx.reset_stat.sq_stat_mask = 0xFFFF;
745 nicvf_send_msg_to_pf(nic, &mbx);
748 /* Configures receive queue */
749 static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
750 int qidx, bool enable)
752 union nic_mbx mbx = {};
753 struct rcv_queue *rq;
754 struct rq_cfg rq_cfg;
759 /* Disable receive queue */
760 nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
763 nicvf_reclaim_rcv_queue(nic, qs, qidx);
764 xdp_rxq_info_unreg(&rq->xdp_rxq);
768 rq->cq_qs = qs->vnic_id;
770 rq->start_rbdr_qs = qs->vnic_id;
771 rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
772 rq->cont_rbdr_qs = qs->vnic_id;
773 rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
774 /* all writes of RBDR data to be loaded into L2 Cache as well*/
777 /* Driver have no proper error path for failed XDP RX-queue info reg */
778 WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);
780 /* Send a mailbox msg to PF to config RQ */
781 mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
782 mbx.rq.qs_num = qs->vnic_id;
783 mbx.rq.rq_num = qidx;
784 mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
785 (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
786 (rq->cont_qs_rbdr_idx << 8) |
787 (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx);
788 nicvf_send_msg_to_pf(nic, &mbx);
790 mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
791 mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
792 (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
794 nicvf_send_msg_to_pf(nic, &mbx);
797 * Enable CQ drop to reserve sufficient CQEs for all tx packets
799 mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
800 mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
801 (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
802 (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
803 nicvf_send_msg_to_pf(nic, &mbx);
805 if (!nic->sqs_mode && (qidx == 0)) {
806 /* Enable checking L3/L4 length and TCP/UDP checksums
807 * Also allow IPv6 pkts with zero UDP checksum.
809 nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
810 (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
811 nicvf_config_vlan_stripping(nic, nic->netdev->features);
814 /* Enable Receive queue */
815 memset(&rq_cfg, 0, sizeof(struct rq_cfg));
818 nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
821 /* Configures completion queue */
822 void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
823 int qidx, bool enable)
825 struct cmp_queue *cq;
826 struct cq_cfg cq_cfg;
832 nicvf_reclaim_cmp_queue(nic, qs, qidx);
836 /* Reset completion queue */
837 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
842 spin_lock_init(&cq->lock);
843 /* Set completion queue base address */
844 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE,
845 qidx, (u64)(cq->dmem.phys_base));
847 /* Enable Completion queue */
848 memset(&cq_cfg, 0, sizeof(struct cq_cfg));
852 cq_cfg.qsize = ilog2(qs->cq_len >> 10);
854 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg);
856 /* Set threshold value for interrupt generation */
857 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
858 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
859 qidx, CMP_QUEUE_TIMER_THRESH);
862 /* Configures transmit queue */
863 static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
864 int qidx, bool enable)
866 union nic_mbx mbx = {};
867 struct snd_queue *sq;
868 struct sq_cfg sq_cfg;
874 nicvf_reclaim_snd_queue(nic, qs, qidx);
878 /* Reset send queue */
879 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
881 sq->cq_qs = qs->vnic_id;
884 /* Send a mailbox msg to PF to config SQ */
885 mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
886 mbx.sq.qs_num = qs->vnic_id;
887 mbx.sq.sq_num = qidx;
888 mbx.sq.sqs_mode = nic->sqs_mode;
889 mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
890 nicvf_send_msg_to_pf(nic, &mbx);
892 /* Set queue base address */
893 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE,
894 qidx, (u64)(sq->dmem.phys_base));
896 /* Enable send queue & set queue size */
897 memset(&sq_cfg, 0, sizeof(struct sq_cfg));
901 sq_cfg.qsize = ilog2(qs->sq_len >> 10);
902 sq_cfg.tstmp_bgx_intf = 0;
903 /* CQ's level at which HW will stop processing SQEs to avoid
904 * transmitting a pkt with no space in CQ to post CQE_TX.
906 sq_cfg.cq_limit = (CMP_QUEUE_PIPELINE_RSVD * 256) / qs->cq_len;
907 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
909 /* Set threshold value for interrupt generation */
910 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
912 /* Set queue:cpu affinity for better load distribution */
913 if (cpu_online(qidx)) {
914 cpumask_set_cpu(qidx, &sq->affinity_mask);
915 netif_set_xps_queue(nic->netdev,
916 &sq->affinity_mask, qidx);
920 /* Configures receive buffer descriptor ring */
921 static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
922 int qidx, bool enable)
925 struct rbdr_cfg rbdr_cfg;
927 rbdr = &qs->rbdr[qidx];
928 nicvf_reclaim_rbdr(nic, rbdr, qidx);
932 /* Set descriptor base address */
933 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE,
934 qidx, (u64)(rbdr->dmem.phys_base));
936 /* Enable RBDR & set queue size */
937 /* Buffer size should be in multiples of 128 bytes */
938 memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
942 rbdr_cfg.qsize = RBDR_SIZE;
943 rbdr_cfg.avg_con = 0;
944 rbdr_cfg.lines = rbdr->dma_size / 128;
945 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
946 qidx, *(u64 *)&rbdr_cfg);
949 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
950 qidx, qs->rbdr_len - 1);
952 /* Set threshold value for interrupt generation */
953 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH,
954 qidx, rbdr->thresh - 1);
957 /* Requests PF to assign and enable Qset */
958 void nicvf_qset_config(struct nicvf *nic, bool enable)
960 union nic_mbx mbx = {};
961 struct queue_set *qs = nic->qs;
962 struct qs_cfg *qs_cfg;
965 netdev_warn(nic->netdev,
966 "Qset is still not allocated, don't init queues\n");
971 qs->vnic_id = nic->vf_id;
973 /* Send a mailbox msg to PF to config Qset */
974 mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
975 mbx.qs.num = qs->vnic_id;
976 mbx.qs.sqs_count = nic->sqs_count;
979 qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
985 qs_cfg->vnic = qs->vnic_id;
986 /* Enable Tx timestamping capability */
988 qs_cfg->send_tstmp_ena = 1;
990 nicvf_send_msg_to_pf(nic, &mbx);
993 static void nicvf_free_resources(struct nicvf *nic)
996 struct queue_set *qs = nic->qs;
998 /* Free receive buffer descriptor ring */
999 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1000 nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
1002 /* Free completion queue */
1003 for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1004 nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
1006 /* Free send queue */
1007 for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1008 nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1011 static int nicvf_alloc_resources(struct nicvf *nic)
1014 struct queue_set *qs = nic->qs;
1016 /* Alloc receive buffer descriptor ring */
1017 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1018 if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1023 /* Alloc send queue */
1024 for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1025 if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1029 /* Alloc completion queue */
1030 for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1031 if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len))
1037 nicvf_free_resources(nic);
1041 int nicvf_set_qset_resources(struct nicvf *nic)
1043 struct queue_set *qs;
1045 qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL);
1050 /* Set count of each queue */
1051 qs->rbdr_cnt = DEFAULT_RBDR_CNT;
1052 qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
1053 qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
1054 qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
1056 /* Set queue lengths */
1057 qs->rbdr_len = RCV_BUF_COUNT;
1058 qs->sq_len = SND_QUEUE_LEN;
1059 qs->cq_len = CMP_QUEUE_LEN;
1061 nic->rx_queues = qs->rq_cnt;
1062 nic->tx_queues = qs->sq_cnt;
1063 nic->xdp_tx_queues = 0;
1068 int nicvf_config_data_transfer(struct nicvf *nic, bool enable)
1070 bool disable = false;
1071 struct queue_set *qs = nic->qs;
1072 struct queue_set *pqs = nic->pnicvf->qs;
1078 /* Take primary VF's queue lengths.
1079 * This is needed to take queue lengths set from ethtool
1080 * into consideration.
1082 if (nic->sqs_mode && pqs) {
1083 qs->cq_len = pqs->cq_len;
1084 qs->sq_len = pqs->sq_len;
1088 if (nicvf_alloc_resources(nic))
1091 for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1092 nicvf_snd_queue_config(nic, qs, qidx, enable);
1093 for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1094 nicvf_cmp_queue_config(nic, qs, qidx, enable);
1095 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1096 nicvf_rbdr_config(nic, qs, qidx, enable);
1097 for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1098 nicvf_rcv_queue_config(nic, qs, qidx, enable);
1100 for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1101 nicvf_rcv_queue_config(nic, qs, qidx, disable);
1102 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1103 nicvf_rbdr_config(nic, qs, qidx, disable);
1104 for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1105 nicvf_snd_queue_config(nic, qs, qidx, disable);
1106 for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1107 nicvf_cmp_queue_config(nic, qs, qidx, disable);
1109 nicvf_free_resources(nic);
1112 /* Reset RXQ's stats.
1113 * SQ's stats will get reset automatically once SQ is reset.
1115 nicvf_reset_rcv_queue_stats(nic);
1120 /* Get a free desc from SQ
1121 * returns descriptor ponter & descriptor number
1123 static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1129 atomic_sub(desc_cnt, &sq->free_cnt);
1131 sq->xdp_free_cnt -= desc_cnt;
1132 sq->tail += desc_cnt;
1133 sq->tail &= (sq->dmem.q_len - 1);
1138 /* Rollback to previous tail pointer when descriptors not used */
1139 static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
1140 int qentry, int desc_cnt)
1143 atomic_add(desc_cnt, &sq->free_cnt);
1146 /* Free descriptor back to SQ for future use */
1147 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1150 atomic_add(desc_cnt, &sq->free_cnt);
1152 sq->xdp_free_cnt += desc_cnt;
1153 sq->head += desc_cnt;
1154 sq->head &= (sq->dmem.q_len - 1);
1157 static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1160 qentry &= (sq->dmem.q_len - 1);
1164 void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1168 sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1169 sq_cfg |= NICVF_SQ_EN;
1170 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1171 /* Ring doorbell so that H/W restarts processing SQEs */
1172 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1175 void nicvf_sq_disable(struct nicvf *nic, int qidx)
1179 sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1180 sq_cfg &= ~NICVF_SQ_EN;
1181 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1184 void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq,
1188 struct sk_buff *skb;
1189 struct nicvf *nic = netdev_priv(netdev);
1190 struct sq_hdr_subdesc *hdr;
1192 head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1193 tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
1194 while (sq->head != head) {
1195 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1196 if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1197 nicvf_put_sq_desc(sq, 1);
1200 skb = (struct sk_buff *)sq->skbuff[sq->head];
1202 dev_kfree_skb_any(skb);
1203 atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets);
1204 atomic64_add(hdr->tot_len,
1205 (atomic64_t *)&netdev->stats.tx_bytes);
1206 nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1210 /* XDP Transmit APIs */
1211 void nicvf_xdp_sq_doorbell(struct nicvf *nic,
1212 struct snd_queue *sq, int sq_num)
1214 if (!sq->xdp_desc_cnt)
1217 /* make sure all memory stores are done before ringing doorbell */
1220 /* Inform HW to xmit all TSO segments */
1221 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
1222 sq_num, sq->xdp_desc_cnt);
1223 sq->xdp_desc_cnt = 0;
1227 nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1228 int subdesc_cnt, u64 data, int len)
1230 struct sq_hdr_subdesc *hdr;
1232 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1233 memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1234 hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1235 hdr->subdesc_cnt = subdesc_cnt;
1238 sq->xdp_page[qentry] = (u64)virt_to_page((void *)data);
1241 int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
1242 u64 bufaddr, u64 dma_addr, u16 len)
1244 int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1247 if (subdesc_cnt > sq->xdp_free_cnt)
1250 qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1252 nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len);
1254 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1255 nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr);
1257 sq->xdp_desc_cnt += subdesc_cnt;
1262 /* Calculate no of SQ subdescriptors needed to transmit all
1263 * segments of this TSO packet.
1264 * Taken from 'Tilera network driver' with a minor modification.
1266 static int nicvf_tso_count_subdescs(struct sk_buff *skb)
1268 struct skb_shared_info *sh = skb_shinfo(skb);
1269 unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1270 unsigned int data_len = skb->len - sh_len;
1271 unsigned int p_len = sh->gso_size;
1272 long f_id = -1; /* id of the current fragment */
1273 long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
1274 long f_used = 0; /* bytes used from the current fragment */
1275 long n; /* size of the current piece of payload */
1279 for (segment = 0; segment < sh->gso_segs; segment++) {
1280 unsigned int p_used = 0;
1282 /* One edesc for header and for each piece of the payload. */
1283 for (num_edescs++; p_used < p_len; num_edescs++) {
1284 /* Advance as needed. */
1285 while (f_used >= f_size) {
1287 f_size = skb_frag_size(&sh->frags[f_id]);
1291 /* Use bytes from the current fragment. */
1293 if (n > f_size - f_used)
1294 n = f_size - f_used;
1299 /* The last segment may be less than gso_size. */
1301 if (data_len < p_len)
1305 /* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */
1306 return num_edescs + sh->gso_segs;
1309 #define POST_CQE_DESC_COUNT 2
1311 /* Get the number of SQ descriptors needed to xmit this skb */
1312 static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
1314 int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1316 if (skb_shinfo(skb)->gso_size && !nic->hw_tso) {
1317 subdesc_cnt = nicvf_tso_count_subdescs(skb);
1321 /* Dummy descriptors to get TSO pkt completion notification */
1322 if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
1323 subdesc_cnt += POST_CQE_DESC_COUNT;
1325 if (skb_shinfo(skb)->nr_frags)
1326 subdesc_cnt += skb_shinfo(skb)->nr_frags;
1331 /* Add SQ HEADER subdescriptor.
1332 * First subdescriptor for every send descriptor.
1335 nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
1336 int subdesc_cnt, struct sk_buff *skb, int len)
1339 struct sq_hdr_subdesc *hdr;
1346 ip.hdr = skb_network_header(skb);
1347 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1348 memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1349 hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1351 if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
1352 /* post_cqe = 0, to avoid HW posting a CQE for every TSO
1353 * segment transmitted on 88xx.
1355 hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
1357 sq->skbuff[qentry] = (u64)skb;
1358 /* Enable notification via CQE after processing SQE */
1360 /* No of subdescriptors following this */
1361 hdr->subdesc_cnt = subdesc_cnt;
1365 /* Offload checksum calculation to HW */
1366 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1367 if (ip.v4->version == 4)
1368 hdr->csum_l3 = 1; /* Enable IP csum calculation */
1369 hdr->l3_offset = skb_network_offset(skb);
1370 hdr->l4_offset = skb_transport_offset(skb);
1372 proto = (ip.v4->version == 4) ? ip.v4->protocol :
1377 hdr->csum_l4 = SEND_L4_CSUM_TCP;
1380 hdr->csum_l4 = SEND_L4_CSUM_UDP;
1383 hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1388 if (nic->hw_tso && skb_shinfo(skb)->gso_size) {
1390 hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb);
1391 hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
1392 /* For non-tunneled pkts, point this to L2 ethertype */
1393 hdr->inner_l3_offset = skb_network_offset(skb) - 2;
1394 this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
1397 /* Check if timestamp is requested */
1398 if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1399 skb_tx_timestamp(skb);
1403 /* Tx timestamping not supported along with TSO, so ignore request */
1404 if (skb_shinfo(skb)->gso_size)
1407 /* HW supports only a single outstanding packet to timestamp */
1408 if (!atomic_add_unless(&nic->pnicvf->tx_ptp_skbs, 1, 1))
1411 /* Mark the SKB for later reference */
1412 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1414 /* Finally enable timestamp generation
1415 * Since 'post_cqe' is also set, two CQEs will be posted
1416 * for this packet i.e CQE_TYPE_SEND and CQE_TYPE_SEND_PTP.
1421 /* SQ GATHER subdescriptor
1422 * Must follow HDR descriptor
1424 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1427 struct sq_gather_subdesc *gather;
1429 qentry &= (sq->dmem.q_len - 1);
1430 gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1432 memset(gather, 0, SND_QUEUE_DESC_SIZE);
1433 gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1434 gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1435 gather->size = size;
1436 gather->addr = data;
1439 /* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
1440 * packet so that a CQE is posted as a notifation for transmission of
1443 static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
1444 int tso_sqe, struct sk_buff *skb)
1446 struct sq_imm_subdesc *imm;
1447 struct sq_hdr_subdesc *hdr;
1449 sq->skbuff[qentry] = (u64)skb;
1451 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1452 memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1453 hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1454 /* Enable notification via CQE after processing SQE */
1456 /* There is no packet to transmit here */
1458 hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
1460 /* Actual TSO header SQE index, needed for cleanup */
1461 hdr->rsvd2 = tso_sqe;
1463 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1464 imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
1465 memset(imm, 0, SND_QUEUE_DESC_SIZE);
1466 imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
1470 static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb,
1471 int sq_num, int desc_cnt)
1473 struct netdev_queue *txq;
1475 txq = netdev_get_tx_queue(nic->pnicvf->netdev,
1476 skb_get_queue_mapping(skb));
1478 netdev_tx_sent_queue(txq, skb->len);
1480 /* make sure all memory stores are done before ringing doorbell */
1483 /* Inform HW to xmit all TSO segments */
1484 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
1488 /* Segment a TSO packet into 'gso_size' segments and append
1489 * them to SQ for transfer
1491 static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
1492 int sq_num, int qentry, struct sk_buff *skb)
1495 int seg_subdescs = 0, desc_cnt = 0;
1496 int seg_len, total_len, data_left;
1497 int hdr_qentry = qentry;
1498 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1500 tso_start(skb, &tso);
1501 total_len = skb->len - hdr_len;
1502 while (total_len > 0) {
1505 /* Save Qentry for adding HDR_SUBDESC at the end */
1506 hdr_qentry = qentry;
1508 data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
1509 total_len -= data_left;
1511 /* Add segment's header */
1512 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1513 hdr = sq->tso_hdrs + qentry * TSO_HEADER_SIZE;
1514 tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
1515 nicvf_sq_add_gather_subdesc(sq, qentry, hdr_len,
1517 qentry * TSO_HEADER_SIZE);
1518 /* HDR_SUDESC + GATHER */
1522 /* Add segment's payload fragments */
1523 while (data_left > 0) {
1526 size = min_t(int, tso.size, data_left);
1528 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1529 nicvf_sq_add_gather_subdesc(sq, qentry, size,
1530 virt_to_phys(tso.data));
1535 tso_build_data(skb, &tso, size);
1537 nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry,
1538 seg_subdescs - 1, skb, seg_len);
1539 sq->skbuff[hdr_qentry] = (u64)NULL;
1540 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1542 desc_cnt += seg_subdescs;
1544 /* Save SKB in the last segment for freeing */
1545 sq->skbuff[hdr_qentry] = (u64)skb;
1547 nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
1549 this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
1553 /* Append an skb to a SQ for packet transfer. */
1554 int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
1555 struct sk_buff *skb, u8 sq_num)
1558 int subdesc_cnt, hdr_sqe = 0;
1562 subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
1563 if (subdesc_cnt > atomic_read(&sq->free_cnt))
1566 qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1568 /* Check if its a TSO packet */
1569 if (skb_shinfo(skb)->gso_size && !nic->hw_tso)
1570 return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);
1572 /* Add SQ header subdesc */
1573 nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
1577 /* Add SQ gather subdescs */
1578 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1579 size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
1580 /* HW will ensure data coherency, CPU sync not required */
1581 dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
1582 offset_in_page(skb->data), size,
1583 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1584 if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
1585 nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
1589 nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
1591 /* Check for scattered buffer */
1592 if (!skb_is_nonlinear(skb))
1595 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1596 const struct skb_frag_struct *frag;
1598 frag = &skb_shinfo(skb)->frags[i];
1600 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1601 size = skb_frag_size(frag);
1602 dma_addr = dma_map_page_attrs(&nic->pdev->dev,
1603 skb_frag_page(frag),
1604 frag->page_offset, size,
1606 DMA_ATTR_SKIP_CPU_SYNC);
1607 if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
1608 /* Free entire chain of mapped buffers
1609 * here 'i' = frags mapped + above mapped skb->data
1611 nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
1612 nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
1615 nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
1619 if (nic->t88 && skb_shinfo(skb)->gso_size) {
1620 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1621 nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
1624 nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
1629 /* Use original PCI dev for debug log */
1631 netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
1635 static inline unsigned frag_num(unsigned i)
1638 return (i & ~3) + 3 - (i & 3);
1644 static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
1645 u64 buf_addr, bool xdp)
1647 struct page *page = NULL;
1648 int len = RCV_FRAG_LEN;
1651 page = virt_to_page(phys_to_virt(buf_addr));
1652 /* Check if it's a recycled page, if not
1653 * unmap the DMA mapping.
1655 * Recycled page holds an extra reference.
1657 if (page_ref_count(page) != 1)
1660 len += XDP_PACKET_HEADROOM;
1661 /* Receive buffers in XDP mode are mapped from page start */
1662 dma_addr &= PAGE_MASK;
1664 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len,
1665 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1668 /* Returns SKB for a received packet */
1669 struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
1670 struct cqe_rx_t *cqe_rx, bool xdp)
1673 int payload_len = 0;
1674 struct sk_buff *skb = NULL;
1677 u16 *rb_lens = NULL;
1678 u64 *rb_ptrs = NULL;
1681 rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
1682 /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
1683 * CQE_RX at word6, hence buffer pointers move by word
1685 * Use existing 'hw_tso' flag which will be set for all chips
1686 * except 88xx pass1 instead of a additional cache line
1687 * access (or miss) by using pci dev's revision.
1690 rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
1692 rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
1694 for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
1695 payload_len = rb_lens[frag_num(frag)];
1696 phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
1699 dev_kfree_skb_any(skb);
1704 /* First fragment */
1705 nicvf_unmap_rcv_buffer(nic,
1706 *rb_ptrs - cqe_rx->align_pad,
1708 skb = nicvf_rb_ptr_to_skb(nic,
1709 phys_addr - cqe_rx->align_pad,
1713 skb_reserve(skb, cqe_rx->align_pad);
1714 skb_put(skb, payload_len);
1717 nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp);
1718 page = virt_to_page(phys_to_virt(phys_addr));
1719 offset = phys_to_virt(phys_addr) - page_address(page);
1720 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
1721 offset, payload_len, RCV_FRAG_LEN);
1723 /* Next buffer pointer */
1729 static u64 nicvf_int_type_to_mask(int int_type, int q_idx)
1735 reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
1738 reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
1740 case NICVF_INTR_RBDR:
1741 reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
1743 case NICVF_INTR_PKT_DROP:
1744 reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
1746 case NICVF_INTR_TCP_TIMER:
1747 reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
1749 case NICVF_INTR_MBOX:
1750 reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
1752 case NICVF_INTR_QS_ERR:
1753 reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT);
1762 /* Enable interrupt */
1763 void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
1765 u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1768 netdev_dbg(nic->netdev,
1769 "Failed to enable interrupt: unknown type\n");
1772 nicvf_reg_write(nic, NIC_VF_ENA_W1S,
1773 nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask);
1776 /* Disable interrupt */
1777 void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
1779 u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1782 netdev_dbg(nic->netdev,
1783 "Failed to disable interrupt: unknown type\n");
1787 nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask);
1790 /* Clear interrupt */
1791 void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
1793 u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1796 netdev_dbg(nic->netdev,
1797 "Failed to clear interrupt: unknown type\n");
1801 nicvf_reg_write(nic, NIC_VF_INT, mask);
1804 /* Check if interrupt is enabled */
1805 int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
1807 u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1808 /* If interrupt type is unknown, we treat it disabled. */
1810 netdev_dbg(nic->netdev,
1811 "Failed to check interrupt enable: unknown type\n");
1815 return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S);
1818 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
1820 struct rcv_queue *rq;
1822 #define GET_RQ_STATS(reg) \
1823 nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
1824 (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1826 rq = &nic->qs->rq[rq_idx];
1827 rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
1828 rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
1831 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
1833 struct snd_queue *sq;
1835 #define GET_SQ_STATS(reg) \
1836 nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
1837 (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1839 sq = &nic->qs->sq[sq_idx];
1840 sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
1841 sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
1844 /* Check for errors in the receive cmp.queue entry */
1845 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1847 netif_err(nic, rx_err, nic->netdev,
1848 "RX error CQE err_level 0x%x err_opcode 0x%x\n",
1849 cqe_rx->err_level, cqe_rx->err_opcode);
1851 switch (cqe_rx->err_opcode) {
1852 case CQ_RX_ERROP_RE_PARTIAL:
1853 this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts);
1855 case CQ_RX_ERROP_RE_JABBER:
1856 this_cpu_inc(nic->drv_stats->rx_jabber_errs);
1858 case CQ_RX_ERROP_RE_FCS:
1859 this_cpu_inc(nic->drv_stats->rx_fcs_errs);
1861 case CQ_RX_ERROP_RE_RX_CTL:
1862 this_cpu_inc(nic->drv_stats->rx_bgx_errs);
1864 case CQ_RX_ERROP_PREL2_ERR:
1865 this_cpu_inc(nic->drv_stats->rx_prel2_errs);
1867 case CQ_RX_ERROP_L2_MAL:
1868 this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed);
1870 case CQ_RX_ERROP_L2_OVERSIZE:
1871 this_cpu_inc(nic->drv_stats->rx_oversize);
1873 case CQ_RX_ERROP_L2_UNDERSIZE:
1874 this_cpu_inc(nic->drv_stats->rx_undersize);
1876 case CQ_RX_ERROP_L2_LENMISM:
1877 this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch);
1879 case CQ_RX_ERROP_L2_PCLP:
1880 this_cpu_inc(nic->drv_stats->rx_l2_pclp);
1882 case CQ_RX_ERROP_IP_NOT:
1883 this_cpu_inc(nic->drv_stats->rx_ip_ver_errs);
1885 case CQ_RX_ERROP_IP_CSUM_ERR:
1886 this_cpu_inc(nic->drv_stats->rx_ip_csum_errs);
1888 case CQ_RX_ERROP_IP_MAL:
1889 this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed);
1891 case CQ_RX_ERROP_IP_MALD:
1892 this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed);
1894 case CQ_RX_ERROP_IP_HOP:
1895 this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs);
1897 case CQ_RX_ERROP_L3_PCLP:
1898 this_cpu_inc(nic->drv_stats->rx_l3_pclp);
1900 case CQ_RX_ERROP_L4_MAL:
1901 this_cpu_inc(nic->drv_stats->rx_l4_malformed);
1903 case CQ_RX_ERROP_L4_CHK:
1904 this_cpu_inc(nic->drv_stats->rx_l4_csum_errs);
1906 case CQ_RX_ERROP_UDP_LEN:
1907 this_cpu_inc(nic->drv_stats->rx_udp_len_errs);
1909 case CQ_RX_ERROP_L4_PORT:
1910 this_cpu_inc(nic->drv_stats->rx_l4_port_errs);
1912 case CQ_RX_ERROP_TCP_FLAG:
1913 this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs);
1915 case CQ_RX_ERROP_TCP_OFFSET:
1916 this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs);
1918 case CQ_RX_ERROP_L4_PCLP:
1919 this_cpu_inc(nic->drv_stats->rx_l4_pclp);
1921 case CQ_RX_ERROP_RBDR_TRUNC:
1922 this_cpu_inc(nic->drv_stats->rx_truncated_pkts);
1929 /* Check for errors in the send cmp.queue entry */
1930 int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
1932 switch (cqe_tx->send_status) {
1933 case CQ_TX_ERROP_DESC_FAULT:
1934 this_cpu_inc(nic->drv_stats->tx_desc_fault);
1936 case CQ_TX_ERROP_HDR_CONS_ERR:
1937 this_cpu_inc(nic->drv_stats->tx_hdr_cons_err);
1939 case CQ_TX_ERROP_SUBDC_ERR:
1940 this_cpu_inc(nic->drv_stats->tx_subdesc_err);
1942 case CQ_TX_ERROP_MAX_SIZE_VIOL:
1943 this_cpu_inc(nic->drv_stats->tx_max_size_exceeded);
1945 case CQ_TX_ERROP_IMM_SIZE_OFLOW:
1946 this_cpu_inc(nic->drv_stats->tx_imm_size_oflow);
1948 case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
1949 this_cpu_inc(nic->drv_stats->tx_data_seq_err);
1951 case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
1952 this_cpu_inc(nic->drv_stats->tx_mem_seq_err);
1954 case CQ_TX_ERROP_LOCK_VIOL:
1955 this_cpu_inc(nic->drv_stats->tx_lock_viol);
1957 case CQ_TX_ERROP_DATA_FAULT:
1958 this_cpu_inc(nic->drv_stats->tx_data_fault);
1960 case CQ_TX_ERROP_TSTMP_CONFLICT:
1961 this_cpu_inc(nic->drv_stats->tx_tstmp_conflict);
1963 case CQ_TX_ERROP_TSTMP_TIMEOUT:
1964 this_cpu_inc(nic->drv_stats->tx_tstmp_timeout);
1966 case CQ_TX_ERROP_MEM_FAULT:
1967 this_cpu_inc(nic->drv_stats->tx_mem_fault);
1969 case CQ_TX_ERROP_CK_OVERLAP:
1970 this_cpu_inc(nic->drv_stats->tx_csum_overlap);
1972 case CQ_TX_ERROP_CK_OFLOW:
1973 this_cpu_inc(nic->drv_stats->tx_csum_overflow);