1 /*******************************************************************************
3 Intel 10 Gigabit PCI Express Linux driver
4 Copyright(c) 1999 - 2013 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
25 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
27 *******************************************************************************/
30 #include "ixgbe_sriov.h"
32 #ifdef CONFIG_IXGBE_DCB
34 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV
35 * @adapter: board private structure to initialize
37 * Cache the descriptor ring offsets for SR-IOV to the assigned rings. It
38 * will also try to cache the proper offsets if RSS/FCoE are enabled along
42 static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
45 struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
46 #endif /* IXGBE_FCOE */
47 struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
50 u8 tcs = netdev_get_num_tc(adapter->netdev);
52 /* verify we have DCB queueing enabled before proceeding */
56 /* verify we have VMDq enabled before proceeding */
57 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
60 /* start at VMDq register offset for SR-IOV enabled setups */
61 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
62 for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
63 /* If we are greater than indices move to next pool */
64 if ((reg_idx & ~vmdq->mask) >= tcs)
65 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
66 adapter->rx_ring[i]->reg_idx = reg_idx;
69 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
70 for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) {
71 /* If we are greater than indices move to next pool */
72 if ((reg_idx & ~vmdq->mask) >= tcs)
73 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
74 adapter->tx_ring[i]->reg_idx = reg_idx;
78 /* nothing to do if FCoE is disabled */
79 if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
82 /* The work is already done if the FCoE ring is shared */
83 if (fcoe->offset < tcs)
86 /* The FCoE rings exist separately, we need to move their reg_idx */
88 u16 queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
89 u8 fcoe_tc = ixgbe_fcoe_get_tc(adapter);
91 reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool;
92 for (i = fcoe->offset; i < adapter->num_rx_queues; i++) {
93 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc;
94 adapter->rx_ring[i]->reg_idx = reg_idx;
98 reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool;
99 for (i = fcoe->offset; i < adapter->num_tx_queues; i++) {
100 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc;
101 adapter->tx_ring[i]->reg_idx = reg_idx;
106 #endif /* IXGBE_FCOE */
110 /* ixgbe_get_first_reg_idx - Return first register index associated with ring */
111 static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
112 unsigned int *tx, unsigned int *rx)
114 struct net_device *dev = adapter->netdev;
115 struct ixgbe_hw *hw = &adapter->hw;
116 u8 num_tcs = netdev_get_num_tc(dev);
121 switch (hw->mac.type) {
122 case ixgbe_mac_82598EB:
123 /* TxQs/TC: 4 RxQs/TC: 8 */
124 *tx = tc << 2; /* 0, 4, 8, 12, 16, 20, 24, 28 */
125 *rx = tc << 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */
127 case ixgbe_mac_82599EB:
131 * TCs : TC0/1 TC2/3 TC4-7
137 *tx = tc << 5; /* 0, 32, 64 */
139 *tx = (tc + 2) << 4; /* 80, 96 */
141 *tx = (tc + 8) << 3; /* 104, 112, 120 */
144 * TCs : TC0 TC1 TC2/3
150 *tx = tc << 6; /* 0, 64 */
152 *tx = (tc + 4) << 4; /* 96, 112 */
160 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
161 * @adapter: board private structure to initialize
163 * Cache the descriptor ring offsets for DCB to the assigned rings.
166 static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter *adapter)
168 struct net_device *dev = adapter->netdev;
169 unsigned int tx_idx, rx_idx;
170 int tc, offset, rss_i, i;
171 u8 num_tcs = netdev_get_num_tc(dev);
173 /* verify we have DCB queueing enabled before proceeding */
177 rss_i = adapter->ring_feature[RING_F_RSS].indices;
179 for (tc = 0, offset = 0; tc < num_tcs; tc++, offset += rss_i) {
180 ixgbe_get_first_reg_idx(adapter, tc, &tx_idx, &rx_idx);
181 for (i = 0; i < rss_i; i++, tx_idx++, rx_idx++) {
182 adapter->tx_ring[offset + i]->reg_idx = tx_idx;
183 adapter->rx_ring[offset + i]->reg_idx = rx_idx;
184 adapter->tx_ring[offset + i]->dcb_tc = tc;
185 adapter->rx_ring[offset + i]->dcb_tc = tc;
194 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov
195 * @adapter: board private structure to initialize
197 * SR-IOV doesn't use any descriptor rings but changes the default if
198 * no other mapping is used.
201 static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter)
204 struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
205 #endif /* IXGBE_FCOE */
206 struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
207 struct ixgbe_ring_feature *rss = &adapter->ring_feature[RING_F_RSS];
211 /* only proceed if VMDq is enabled */
212 if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED))
215 /* start at VMDq register offset for SR-IOV enabled setups */
216 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
217 for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
219 /* Allow first FCoE queue to be mapped as RSS */
220 if (fcoe->offset && (i > fcoe->offset))
223 /* If we are greater than indices move to next pool */
224 if ((reg_idx & ~vmdq->mask) >= rss->indices)
225 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
226 adapter->rx_ring[i]->reg_idx = reg_idx;
230 /* FCoE uses a linear block of queues so just assigning 1:1 */
231 for (; i < adapter->num_rx_queues; i++, reg_idx++)
232 adapter->rx_ring[i]->reg_idx = reg_idx;
235 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
236 for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) {
238 /* Allow first FCoE queue to be mapped as RSS */
239 if (fcoe->offset && (i > fcoe->offset))
242 /* If we are greater than indices move to next pool */
243 if ((reg_idx & rss->mask) >= rss->indices)
244 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
245 adapter->tx_ring[i]->reg_idx = reg_idx;
249 /* FCoE uses a linear block of queues so just assigning 1:1 */
250 for (; i < adapter->num_tx_queues; i++, reg_idx++)
251 adapter->tx_ring[i]->reg_idx = reg_idx;
259 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS
260 * @adapter: board private structure to initialize
262 * Cache the descriptor ring offsets for RSS to the assigned rings.
265 static bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter)
269 for (i = 0; i < adapter->num_rx_queues; i++)
270 adapter->rx_ring[i]->reg_idx = i;
271 for (i = 0; i < adapter->num_tx_queues; i++)
272 adapter->tx_ring[i]->reg_idx = i;
278 * ixgbe_cache_ring_register - Descriptor ring to register mapping
279 * @adapter: board private structure to initialize
281 * Once we know the feature-set enabled for the device, we'll cache
282 * the register offset the descriptor ring is assigned to.
284 * Note, the order the various feature calls is important. It must start with
285 * the "most" features enabled at the same time, then trickle down to the
286 * least amount of features turned on at once.
288 static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
290 /* start with default case */
291 adapter->rx_ring[0]->reg_idx = 0;
292 adapter->tx_ring[0]->reg_idx = 0;
294 #ifdef CONFIG_IXGBE_DCB
295 if (ixgbe_cache_ring_dcb_sriov(adapter))
298 if (ixgbe_cache_ring_dcb(adapter))
302 if (ixgbe_cache_ring_sriov(adapter))
305 ixgbe_cache_ring_rss(adapter);
308 #define IXGBE_RSS_16Q_MASK 0xF
309 #define IXGBE_RSS_8Q_MASK 0x7
310 #define IXGBE_RSS_4Q_MASK 0x3
311 #define IXGBE_RSS_2Q_MASK 0x1
312 #define IXGBE_RSS_DISABLED_MASK 0x0
314 #ifdef CONFIG_IXGBE_DCB
316 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
317 * @adapter: board private structure to initialize
319 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
320 * and VM pools where appropriate. Also assign queues based on DCB
321 * priorities and map accordingly..
324 static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter)
327 u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
332 u8 tcs = netdev_get_num_tc(adapter->netdev);
334 /* verify we have DCB queueing enabled before proceeding */
338 /* verify we have VMDq enabled before proceeding */
339 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
342 /* Add starting offset to total pool count */
343 vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
345 /* 16 pools w/ 8 TC per pool */
347 vmdq_i = min_t(u16, vmdq_i, 16);
348 vmdq_m = IXGBE_82599_VMDQ_8Q_MASK;
349 /* 32 pools w/ 4 TC per pool */
351 vmdq_i = min_t(u16, vmdq_i, 32);
352 vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
356 /* queues in the remaining pools are available for FCoE */
357 fcoe_i = (128 / __ALIGN_MASK(1, ~vmdq_m)) - vmdq_i;
360 /* remove the starting offset from the pool count */
361 vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
363 /* save features for later use */
364 adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
365 adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
368 * We do not support DCB, VMDq, and RSS all simultaneously
369 * so we will disable RSS since it is the lowest priority
371 adapter->ring_feature[RING_F_RSS].indices = 1;
372 adapter->ring_feature[RING_F_RSS].mask = IXGBE_RSS_DISABLED_MASK;
374 /* disable ATR as it is not supported when VMDq is enabled */
375 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
377 adapter->num_rx_pools = vmdq_i;
378 adapter->num_rx_queues_per_pool = tcs;
380 adapter->num_tx_queues = vmdq_i * tcs;
381 adapter->num_rx_queues = vmdq_i * tcs;
384 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
385 struct ixgbe_ring_feature *fcoe;
387 fcoe = &adapter->ring_feature[RING_F_FCOE];
389 /* limit ourselves based on feature limits */
390 fcoe_i = min_t(u16, fcoe_i, fcoe->limit);
393 /* alloc queues for FCoE separately */
394 fcoe->indices = fcoe_i;
395 fcoe->offset = vmdq_i * tcs;
397 /* add queues to adapter */
398 adapter->num_tx_queues += fcoe_i;
399 adapter->num_rx_queues += fcoe_i;
400 } else if (tcs > 1) {
401 /* use queue belonging to FcoE TC */
403 fcoe->offset = ixgbe_fcoe_get_tc(adapter);
405 adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
412 #endif /* IXGBE_FCOE */
413 /* configure TC to queue mapping */
414 for (i = 0; i < tcs; i++)
415 netdev_set_tc_queue(adapter->netdev, i, 1, i);
420 static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
422 struct net_device *dev = adapter->netdev;
423 struct ixgbe_ring_feature *f;
427 /* Map queue offset and counts onto allocated tx queues */
428 tcs = netdev_get_num_tc(dev);
430 /* verify we have DCB queueing enabled before proceeding */
434 /* determine the upper limit for our current DCB mode */
435 rss_i = dev->num_tx_queues / tcs;
436 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
437 /* 8 TC w/ 4 queues per TC */
438 rss_i = min_t(u16, rss_i, 4);
439 rss_m = IXGBE_RSS_4Q_MASK;
440 } else if (tcs > 4) {
441 /* 8 TC w/ 8 queues per TC */
442 rss_i = min_t(u16, rss_i, 8);
443 rss_m = IXGBE_RSS_8Q_MASK;
445 /* 4 TC w/ 16 queues per TC */
446 rss_i = min_t(u16, rss_i, 16);
447 rss_m = IXGBE_RSS_16Q_MASK;
450 /* set RSS mask and indices */
451 f = &adapter->ring_feature[RING_F_RSS];
452 rss_i = min_t(int, rss_i, f->limit);
456 /* disable ATR as it is not supported when multiple TCs are enabled */
457 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
460 /* FCoE enabled queues require special configuration indexed
461 * by feature specific indices and offset. Here we map FCoE
462 * indices onto the DCB queue pairs allowing FCoE to own
463 * configuration later.
465 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
466 u8 tc = ixgbe_fcoe_get_tc(adapter);
468 f = &adapter->ring_feature[RING_F_FCOE];
469 f->indices = min_t(u16, rss_i, f->limit);
470 f->offset = rss_i * tc;
473 #endif /* IXGBE_FCOE */
474 for (i = 0; i < tcs; i++)
475 netdev_set_tc_queue(dev, i, rss_i, rss_i * i);
477 adapter->num_tx_queues = rss_i * tcs;
478 adapter->num_rx_queues = rss_i * tcs;
485 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
486 * @adapter: board private structure to initialize
488 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
489 * and VM pools where appropriate. If RSS is available, then also try and
490 * enable RSS and map accordingly.
493 static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter)
495 u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
497 u16 rss_i = adapter->ring_feature[RING_F_RSS].limit;
498 u16 rss_m = IXGBE_RSS_DISABLED_MASK;
502 bool pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1);
504 /* only proceed if SR-IOV is enabled */
505 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
508 /* Add starting offset to total pool count */
509 vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
511 /* double check we are limited to maximum pools */
512 vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i);
514 /* 64 pool mode with 2 queues per pool */
515 if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) {
516 vmdq_m = IXGBE_82599_VMDQ_2Q_MASK;
517 rss_m = IXGBE_RSS_2Q_MASK;
518 rss_i = min_t(u16, rss_i, 2);
519 /* 32 pool mode with 4 queues per pool */
521 vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
522 rss_m = IXGBE_RSS_4Q_MASK;
527 /* queues in the remaining pools are available for FCoE */
528 fcoe_i = 128 - (vmdq_i * __ALIGN_MASK(1, ~vmdq_m));
531 /* remove the starting offset from the pool count */
532 vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
534 /* save features for later use */
535 adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
536 adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
538 /* limit RSS based on user input and save for later use */
539 adapter->ring_feature[RING_F_RSS].indices = rss_i;
540 adapter->ring_feature[RING_F_RSS].mask = rss_m;
542 adapter->num_rx_pools = vmdq_i;
543 adapter->num_rx_queues_per_pool = rss_i;
545 adapter->num_rx_queues = vmdq_i * rss_i;
546 adapter->num_tx_queues = vmdq_i * rss_i;
548 /* disable ATR as it is not supported when VMDq is enabled */
549 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
553 * FCoE can use rings from adjacent buffers to allow RSS
554 * like behavior. To account for this we need to add the
555 * FCoE indices to the total ring count.
557 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
558 struct ixgbe_ring_feature *fcoe;
560 fcoe = &adapter->ring_feature[RING_F_FCOE];
562 /* limit ourselves based on feature limits */
563 fcoe_i = min_t(u16, fcoe_i, fcoe->limit);
565 if (vmdq_i > 1 && fcoe_i) {
566 /* alloc queues for FCoE separately */
567 fcoe->indices = fcoe_i;
568 fcoe->offset = vmdq_i * rss_i;
570 /* merge FCoE queues with RSS queues */
571 fcoe_i = min_t(u16, fcoe_i + rss_i, num_online_cpus());
573 /* limit indices to rss_i if MSI-X is disabled */
574 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
577 /* attempt to reserve some queues for just FCoE */
578 fcoe->indices = min_t(u16, fcoe_i, fcoe->limit);
579 fcoe->offset = fcoe_i - fcoe->indices;
584 /* add queues to adapter */
585 adapter->num_tx_queues += fcoe_i;
586 adapter->num_rx_queues += fcoe_i;
594 * ixgbe_set_rss_queues - Allocate queues for RSS
595 * @adapter: board private structure to initialize
597 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
598 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
601 static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
603 struct ixgbe_ring_feature *f;
606 /* set mask for 16 queue limit of RSS */
607 f = &adapter->ring_feature[RING_F_RSS];
611 f->mask = IXGBE_RSS_16Q_MASK;
613 /* disable ATR by default, it will be configured below */
614 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
617 * Use Flow Director in addition to RSS to ensure the best
618 * distribution of flows across cores, even when an FDIR flow
621 if (rss_i > 1 && adapter->atr_sample_rate) {
622 f = &adapter->ring_feature[RING_F_FDIR];
624 rss_i = f->indices = f->limit;
626 if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
627 adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
632 * FCoE can exist on the same rings as standard network traffic
633 * however it is preferred to avoid that if possible. In order
634 * to get the best performance we allocate as many FCoE queues
635 * as we can and we place them at the end of the ring array to
636 * avoid sharing queues with standard RSS on systems with 24 or
639 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
640 struct net_device *dev = adapter->netdev;
643 f = &adapter->ring_feature[RING_F_FCOE];
645 /* merge FCoE queues with RSS queues */
646 fcoe_i = min_t(u16, f->limit + rss_i, num_online_cpus());
647 fcoe_i = min_t(u16, fcoe_i, dev->num_tx_queues);
649 /* limit indices to rss_i if MSI-X is disabled */
650 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
653 /* attempt to reserve some queues for just FCoE */
654 f->indices = min_t(u16, fcoe_i, f->limit);
655 f->offset = fcoe_i - f->indices;
656 rss_i = max_t(u16, fcoe_i, rss_i);
659 #endif /* IXGBE_FCOE */
660 adapter->num_rx_queues = rss_i;
661 adapter->num_tx_queues = rss_i;
667 * ixgbe_set_num_queues - Allocate queues for device, feature dependent
668 * @adapter: board private structure to initialize
670 * This is the top level queue allocation routine. The order here is very
671 * important, starting with the "most" number of features turned on at once,
672 * and ending with the smallest set of features. This way large combinations
673 * can be allocated if they're turned on, and smaller combinations are the
674 * fallthrough conditions.
677 static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
679 /* Start with base case */
680 adapter->num_rx_queues = 1;
681 adapter->num_tx_queues = 1;
682 adapter->num_rx_pools = adapter->num_rx_queues;
683 adapter->num_rx_queues_per_pool = 1;
685 #ifdef CONFIG_IXGBE_DCB
686 if (ixgbe_set_dcb_sriov_queues(adapter))
689 if (ixgbe_set_dcb_queues(adapter))
693 if (ixgbe_set_sriov_queues(adapter))
696 ixgbe_set_rss_queues(adapter);
699 static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
702 int vector_threshold;
704 /* We'll want at least 2 (vector_threshold):
705 * 1) TxQ[0] + RxQ[0] handler
706 * 2) Other (Link Status Change, etc.)
708 vector_threshold = MIN_MSIX_COUNT;
711 * The more we get, the more we will assign to Tx/Rx Cleanup
712 * for the separate queues...where Rx Cleanup >= Tx Cleanup.
713 * Right now, we simply care about how many we'll get; we'll
714 * set them up later while requesting irq's.
716 vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
717 vector_threshold, vectors);
720 /* Can't allocate enough MSI-X interrupts? Oh well.
721 * This just means we'll go with either a single MSI
722 * vector or fall back to legacy interrupts.
724 netif_printk(adapter, hw, KERN_DEBUG, adapter->netdev,
725 "Unable to allocate MSI-X interrupts\n");
726 adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
727 kfree(adapter->msix_entries);
728 adapter->msix_entries = NULL;
730 adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; /* Woot! */
732 * Adjust for only the vectors we'll use, which is minimum
733 * of max_msix_q_vectors + NON_Q_VECTORS, or the number of
734 * vectors we were allocated.
736 vectors -= NON_Q_VECTORS;
737 adapter->num_q_vectors = min(vectors, adapter->max_q_vectors);
741 static void ixgbe_add_ring(struct ixgbe_ring *ring,
742 struct ixgbe_ring_container *head)
744 ring->next = head->ring;
750 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
751 * @adapter: board private structure to initialize
752 * @v_count: q_vectors allocated on adapter, used for ring interleaving
753 * @v_idx: index of vector in adapter struct
754 * @txr_count: total number of Tx rings to allocate
755 * @txr_idx: index of first Tx ring to allocate
756 * @rxr_count: total number of Rx rings to allocate
757 * @rxr_idx: index of first Rx ring to allocate
759 * We allocate one q_vector. If allocation fails we return -ENOMEM.
761 static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
762 int v_count, int v_idx,
763 int txr_count, int txr_idx,
764 int rxr_count, int rxr_idx)
766 struct ixgbe_q_vector *q_vector;
767 struct ixgbe_ring *ring;
768 int node = NUMA_NO_NODE;
770 int ring_count, size;
771 u8 tcs = netdev_get_num_tc(adapter->netdev);
773 ring_count = txr_count + rxr_count;
774 size = sizeof(struct ixgbe_q_vector) +
775 (sizeof(struct ixgbe_ring) * ring_count);
777 /* customize cpu for Flow Director mapping */
778 if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
779 u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
780 if (rss_i > 1 && adapter->atr_sample_rate) {
781 if (cpu_online(v_idx)) {
783 node = cpu_to_node(cpu);
788 /* allocate q_vector and rings */
789 q_vector = kzalloc_node(size, GFP_KERNEL, node);
791 q_vector = kzalloc(size, GFP_KERNEL);
795 /* setup affinity mask and node */
797 cpumask_set_cpu(cpu, &q_vector->affinity_mask);
798 q_vector->numa_node = node;
800 #ifdef CONFIG_IXGBE_DCA
801 /* initialize CPU for DCA */
805 /* initialize NAPI */
806 netif_napi_add(adapter->netdev, &q_vector->napi,
808 napi_hash_add(&q_vector->napi);
810 /* tie q_vector and adapter together */
811 adapter->q_vector[v_idx] = q_vector;
812 q_vector->adapter = adapter;
813 q_vector->v_idx = v_idx;
815 /* initialize work limits */
816 q_vector->tx.work_limit = adapter->tx_work_limit;
818 /* initialize pointer to rings */
819 ring = q_vector->ring;
822 if (txr_count && !rxr_count) {
824 if (adapter->tx_itr_setting == 1)
825 q_vector->itr = IXGBE_10K_ITR;
827 q_vector->itr = adapter->tx_itr_setting;
829 /* rx or rx/tx vector */
830 if (adapter->rx_itr_setting == 1)
831 q_vector->itr = IXGBE_20K_ITR;
833 q_vector->itr = adapter->rx_itr_setting;
837 /* assign generic ring traits */
838 ring->dev = &adapter->pdev->dev;
839 ring->netdev = adapter->netdev;
841 /* configure backlink on ring */
842 ring->q_vector = q_vector;
844 /* update q_vector Tx values */
845 ixgbe_add_ring(ring, &q_vector->tx);
847 /* apply Tx specific ring traits */
848 ring->count = adapter->tx_ring_count;
849 if (adapter->num_rx_pools > 1)
851 txr_idx % adapter->num_rx_queues_per_pool;
853 ring->queue_index = txr_idx;
855 /* assign ring to adapter */
856 adapter->tx_ring[txr_idx] = ring;
858 /* update count and index */
862 /* push pointer to next ring */
867 /* assign generic ring traits */
868 ring->dev = &adapter->pdev->dev;
869 ring->netdev = adapter->netdev;
871 /* configure backlink on ring */
872 ring->q_vector = q_vector;
874 /* update q_vector Rx values */
875 ixgbe_add_ring(ring, &q_vector->rx);
878 * 82599 errata, UDP frames with a 0 checksum
879 * can be marked as checksum errors.
881 if (adapter->hw.mac.type == ixgbe_mac_82599EB)
882 set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state);
885 if (adapter->netdev->features & NETIF_F_FCOE_MTU) {
886 struct ixgbe_ring_feature *f;
887 f = &adapter->ring_feature[RING_F_FCOE];
888 if ((rxr_idx >= f->offset) &&
889 (rxr_idx < f->offset + f->indices))
890 set_bit(__IXGBE_RX_FCOE, &ring->state);
893 #endif /* IXGBE_FCOE */
894 /* apply Rx specific ring traits */
895 ring->count = adapter->rx_ring_count;
896 if (adapter->num_rx_pools > 1)
898 rxr_idx % adapter->num_rx_queues_per_pool;
900 ring->queue_index = rxr_idx;
902 /* assign ring to adapter */
903 adapter->rx_ring[rxr_idx] = ring;
905 /* update count and index */
909 /* push pointer to next ring */
917 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector
918 * @adapter: board private structure to initialize
919 * @v_idx: Index of vector to be freed
921 * This function frees the memory allocated to the q_vector. In addition if
922 * NAPI is enabled it will delete any references to the NAPI struct prior
923 * to freeing the q_vector.
925 static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
927 struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx];
928 struct ixgbe_ring *ring;
930 ixgbe_for_each_ring(ring, q_vector->tx)
931 adapter->tx_ring[ring->queue_index] = NULL;
933 ixgbe_for_each_ring(ring, q_vector->rx)
934 adapter->rx_ring[ring->queue_index] = NULL;
936 adapter->q_vector[v_idx] = NULL;
937 napi_hash_del(&q_vector->napi);
938 netif_napi_del(&q_vector->napi);
941 * ixgbe_get_stats64() might access the rings on this vector,
942 * we must wait a grace period before freeing it.
944 kfree_rcu(q_vector, rcu);
948 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
949 * @adapter: board private structure to initialize
951 * We allocate one q_vector per queue interrupt. If allocation fails we
954 static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
956 int q_vectors = adapter->num_q_vectors;
957 int rxr_remaining = adapter->num_rx_queues;
958 int txr_remaining = adapter->num_tx_queues;
959 int rxr_idx = 0, txr_idx = 0, v_idx = 0;
962 /* only one q_vector if MSI-X is disabled. */
963 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
966 if (q_vectors >= (rxr_remaining + txr_remaining)) {
967 for (; rxr_remaining; v_idx++) {
968 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
974 /* update counts and index */
980 for (; v_idx < q_vectors; v_idx++) {
981 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
982 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
983 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
990 /* update counts and index */
991 rxr_remaining -= rqpv;
992 txr_remaining -= tqpv;
1000 adapter->num_tx_queues = 0;
1001 adapter->num_rx_queues = 0;
1002 adapter->num_q_vectors = 0;
1005 ixgbe_free_q_vector(adapter, v_idx);
1011 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors
1012 * @adapter: board private structure to initialize
1014 * This function frees the memory allocated to the q_vectors. In addition if
1015 * NAPI is enabled it will delete any references to the NAPI struct prior
1016 * to freeing the q_vector.
1018 static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter)
1020 int v_idx = adapter->num_q_vectors;
1022 adapter->num_tx_queues = 0;
1023 adapter->num_rx_queues = 0;
1024 adapter->num_q_vectors = 0;
1027 ixgbe_free_q_vector(adapter, v_idx);
1030 static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
1032 if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
1033 adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
1034 pci_disable_msix(adapter->pdev);
1035 kfree(adapter->msix_entries);
1036 adapter->msix_entries = NULL;
1037 } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
1038 adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED;
1039 pci_disable_msi(adapter->pdev);
1044 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
1045 * @adapter: board private structure to initialize
1047 * Attempt to configure the interrupts using the best available
1048 * capabilities of the hardware and the kernel.
1050 static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
1052 struct ixgbe_hw *hw = &adapter->hw;
1053 int vector, v_budget, err;
1056 * It's easy to be greedy for MSI-X vectors, but it really
1057 * doesn't do us much good if we have a lot more vectors
1058 * than CPU's. So let's be conservative and only ask for
1059 * (roughly) the same number of vectors as there are CPU's.
1060 * The default is to use pairs of vectors.
1062 v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
1063 v_budget = min_t(int, v_budget, num_online_cpus());
1064 v_budget += NON_Q_VECTORS;
1067 * At the same time, hardware can only support a maximum of
1068 * hw.mac->max_msix_vectors vectors. With features
1069 * such as RSS and VMDq, we can easily surpass the number of Rx and Tx
1070 * descriptor queues supported by our device. Thus, we cap it off in
1071 * those rare cases where the cpu count also exceeds our vector limit.
1073 v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors);
1075 /* A failure in MSI-X entry allocation isn't fatal, but it does
1076 * mean we disable MSI-X capabilities of the adapter. */
1077 adapter->msix_entries = kcalloc(v_budget,
1078 sizeof(struct msix_entry), GFP_KERNEL);
1079 if (adapter->msix_entries) {
1080 for (vector = 0; vector < v_budget; vector++)
1081 adapter->msix_entries[vector].entry = vector;
1083 ixgbe_acquire_msix_vectors(adapter, v_budget);
1085 if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
1089 /* disable DCB if number of TCs exceeds 1 */
1090 if (netdev_get_num_tc(adapter->netdev) > 1) {
1091 e_err(probe, "num TCs exceeds number of queues - disabling DCB\n");
1092 netdev_reset_tc(adapter->netdev);
1094 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1095 adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
1097 adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
1098 adapter->temp_dcb_cfg.pfc_mode_enable = false;
1099 adapter->dcb_cfg.pfc_mode_enable = false;
1101 adapter->dcb_cfg.num_tcs.pg_tcs = 1;
1102 adapter->dcb_cfg.num_tcs.pfc_tcs = 1;
1104 /* disable SR-IOV */
1105 ixgbe_disable_sriov(adapter);
1108 adapter->ring_feature[RING_F_RSS].limit = 1;
1110 ixgbe_set_num_queues(adapter);
1111 adapter->num_q_vectors = 1;
1113 err = pci_enable_msi(adapter->pdev);
1115 netif_printk(adapter, hw, KERN_DEBUG, adapter->netdev,
1116 "Unable to allocate MSI interrupt, falling back to legacy. Error: %d\n",
1120 adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
1124 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
1125 * @adapter: board private structure to initialize
1127 * We determine which interrupt scheme to use based on...
1128 * - Kernel support (MSI, MSI-X)
1129 * - which can be user-defined (via MODULE_PARAM)
1130 * - Hardware queue count (num_*_queues)
1131 * - defined by miscellaneous hardware support/features (RSS, etc.)
1133 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
1137 /* Number of supported queues */
1138 ixgbe_set_num_queues(adapter);
1140 /* Set interrupt mode */
1141 ixgbe_set_interrupt_capability(adapter);
1143 err = ixgbe_alloc_q_vectors(adapter);
1145 e_dev_err("Unable to allocate memory for queue vectors\n");
1146 goto err_alloc_q_vectors;
1149 ixgbe_cache_ring_register(adapter);
1151 e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
1152 (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
1153 adapter->num_rx_queues, adapter->num_tx_queues);
1155 set_bit(__IXGBE_DOWN, &adapter->state);
1159 err_alloc_q_vectors:
1160 ixgbe_reset_interrupt_capability(adapter);
1165 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
1166 * @adapter: board private structure to clear interrupt scheme on
1168 * We go through and clear interrupt specific resources and reset the structure
1169 * to pre-load conditions
1171 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
1173 adapter->num_tx_queues = 0;
1174 adapter->num_rx_queues = 0;
1176 ixgbe_free_q_vectors(adapter);
1177 ixgbe_reset_interrupt_capability(adapter);
1180 void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
1181 u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx)
1183 struct ixgbe_adv_tx_context_desc *context_desc;
1184 u16 i = tx_ring->next_to_use;
1186 context_desc = IXGBE_TX_CTXTDESC(tx_ring, i);
1189 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1191 /* set bits to identify this as an advanced context descriptor */
1192 type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1194 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
1195 context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof);
1196 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
1197 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);