1 /*******************************************************************************
3 Intel 10 Gigabit PCI Express Linux driver
4 Copyright(c) 1999 - 2013 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
25 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
27 *******************************************************************************/
30 #include "ixgbe_sriov.h"
32 #ifdef CONFIG_IXGBE_DCB
34 * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV
35 * @adapter: board private structure to initialize
37 * Cache the descriptor ring offsets for SR-IOV to the assigned rings. It
38 * will also try to cache the proper offsets if RSS/FCoE are enabled along
42 static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
45 struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
46 #endif /* IXGBE_FCOE */
47 struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
50 u8 tcs = netdev_get_num_tc(adapter->netdev);
52 /* verify we have DCB queueing enabled before proceeding */
56 /* verify we have VMDq enabled before proceeding */
57 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
60 /* start at VMDq register offset for SR-IOV enabled setups */
61 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
62 for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
63 /* If we are greater than indices move to next pool */
64 if ((reg_idx & ~vmdq->mask) >= tcs)
65 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
66 adapter->rx_ring[i]->reg_idx = reg_idx;
69 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
70 for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) {
71 /* If we are greater than indices move to next pool */
72 if ((reg_idx & ~vmdq->mask) >= tcs)
73 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
74 adapter->tx_ring[i]->reg_idx = reg_idx;
78 /* nothing to do if FCoE is disabled */
79 if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
82 /* The work is already done if the FCoE ring is shared */
83 if (fcoe->offset < tcs)
86 /* The FCoE rings exist separately, we need to move their reg_idx */
88 u16 queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
89 u8 fcoe_tc = ixgbe_fcoe_get_tc(adapter);
91 reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool;
92 for (i = fcoe->offset; i < adapter->num_rx_queues; i++) {
93 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc;
94 adapter->rx_ring[i]->reg_idx = reg_idx;
98 reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool;
99 for (i = fcoe->offset; i < adapter->num_tx_queues; i++) {
100 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc;
101 adapter->tx_ring[i]->reg_idx = reg_idx;
106 #endif /* IXGBE_FCOE */
110 /* ixgbe_get_first_reg_idx - Return first register index associated with ring */
111 static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
112 unsigned int *tx, unsigned int *rx)
114 struct net_device *dev = adapter->netdev;
115 struct ixgbe_hw *hw = &adapter->hw;
116 u8 num_tcs = netdev_get_num_tc(dev);
121 switch (hw->mac.type) {
122 case ixgbe_mac_82598EB:
123 /* TxQs/TC: 4 RxQs/TC: 8 */
124 *tx = tc << 2; /* 0, 4, 8, 12, 16, 20, 24, 28 */
125 *rx = tc << 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */
127 case ixgbe_mac_82599EB:
130 case ixgbe_mac_X550EM_x:
133 * TCs : TC0/1 TC2/3 TC4-7
139 *tx = tc << 5; /* 0, 32, 64 */
141 *tx = (tc + 2) << 4; /* 80, 96 */
143 *tx = (tc + 8) << 3; /* 104, 112, 120 */
146 * TCs : TC0 TC1 TC2/3
152 *tx = tc << 6; /* 0, 64 */
154 *tx = (tc + 4) << 4; /* 96, 112 */
162 * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
163 * @adapter: board private structure to initialize
165 * Cache the descriptor ring offsets for DCB to the assigned rings.
168 static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter *adapter)
170 struct net_device *dev = adapter->netdev;
171 unsigned int tx_idx, rx_idx;
172 int tc, offset, rss_i, i;
173 u8 num_tcs = netdev_get_num_tc(dev);
175 /* verify we have DCB queueing enabled before proceeding */
179 rss_i = adapter->ring_feature[RING_F_RSS].indices;
181 for (tc = 0, offset = 0; tc < num_tcs; tc++, offset += rss_i) {
182 ixgbe_get_first_reg_idx(adapter, tc, &tx_idx, &rx_idx);
183 for (i = 0; i < rss_i; i++, tx_idx++, rx_idx++) {
184 adapter->tx_ring[offset + i]->reg_idx = tx_idx;
185 adapter->rx_ring[offset + i]->reg_idx = rx_idx;
186 adapter->tx_ring[offset + i]->dcb_tc = tc;
187 adapter->rx_ring[offset + i]->dcb_tc = tc;
196 * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov
197 * @adapter: board private structure to initialize
199 * SR-IOV doesn't use any descriptor rings but changes the default if
200 * no other mapping is used.
203 static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter)
206 struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
207 #endif /* IXGBE_FCOE */
208 struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
209 struct ixgbe_ring_feature *rss = &adapter->ring_feature[RING_F_RSS];
213 /* only proceed if VMDq is enabled */
214 if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED))
217 /* start at VMDq register offset for SR-IOV enabled setups */
218 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
219 for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
221 /* Allow first FCoE queue to be mapped as RSS */
222 if (fcoe->offset && (i > fcoe->offset))
225 /* If we are greater than indices move to next pool */
226 if ((reg_idx & ~vmdq->mask) >= rss->indices)
227 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
228 adapter->rx_ring[i]->reg_idx = reg_idx;
232 /* FCoE uses a linear block of queues so just assigning 1:1 */
233 for (; i < adapter->num_rx_queues; i++, reg_idx++)
234 adapter->rx_ring[i]->reg_idx = reg_idx;
237 reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
238 for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) {
240 /* Allow first FCoE queue to be mapped as RSS */
241 if (fcoe->offset && (i > fcoe->offset))
244 /* If we are greater than indices move to next pool */
245 if ((reg_idx & rss->mask) >= rss->indices)
246 reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
247 adapter->tx_ring[i]->reg_idx = reg_idx;
251 /* FCoE uses a linear block of queues so just assigning 1:1 */
252 for (; i < adapter->num_tx_queues; i++, reg_idx++)
253 adapter->tx_ring[i]->reg_idx = reg_idx;
261 * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS
262 * @adapter: board private structure to initialize
264 * Cache the descriptor ring offsets for RSS to the assigned rings.
267 static bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter)
271 for (i = 0; i < adapter->num_rx_queues; i++)
272 adapter->rx_ring[i]->reg_idx = i;
273 for (i = 0; i < adapter->num_tx_queues; i++)
274 adapter->tx_ring[i]->reg_idx = i;
280 * ixgbe_cache_ring_register - Descriptor ring to register mapping
281 * @adapter: board private structure to initialize
283 * Once we know the feature-set enabled for the device, we'll cache
284 * the register offset the descriptor ring is assigned to.
286 * Note, the order the various feature calls is important. It must start with
287 * the "most" features enabled at the same time, then trickle down to the
288 * least amount of features turned on at once.
290 static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
292 /* start with default case */
293 adapter->rx_ring[0]->reg_idx = 0;
294 adapter->tx_ring[0]->reg_idx = 0;
296 #ifdef CONFIG_IXGBE_DCB
297 if (ixgbe_cache_ring_dcb_sriov(adapter))
300 if (ixgbe_cache_ring_dcb(adapter))
304 if (ixgbe_cache_ring_sriov(adapter))
307 ixgbe_cache_ring_rss(adapter);
310 #define IXGBE_RSS_16Q_MASK 0xF
311 #define IXGBE_RSS_8Q_MASK 0x7
312 #define IXGBE_RSS_4Q_MASK 0x3
313 #define IXGBE_RSS_2Q_MASK 0x1
314 #define IXGBE_RSS_DISABLED_MASK 0x0
316 #ifdef CONFIG_IXGBE_DCB
318 * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
319 * @adapter: board private structure to initialize
321 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
322 * and VM pools where appropriate. Also assign queues based on DCB
323 * priorities and map accordingly..
326 static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter)
329 u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
334 u8 tcs = netdev_get_num_tc(adapter->netdev);
336 /* verify we have DCB queueing enabled before proceeding */
340 /* verify we have VMDq enabled before proceeding */
341 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
344 /* Add starting offset to total pool count */
345 vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
347 /* 16 pools w/ 8 TC per pool */
349 vmdq_i = min_t(u16, vmdq_i, 16);
350 vmdq_m = IXGBE_82599_VMDQ_8Q_MASK;
351 /* 32 pools w/ 4 TC per pool */
353 vmdq_i = min_t(u16, vmdq_i, 32);
354 vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
358 /* queues in the remaining pools are available for FCoE */
359 fcoe_i = (128 / __ALIGN_MASK(1, ~vmdq_m)) - vmdq_i;
362 /* remove the starting offset from the pool count */
363 vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
365 /* save features for later use */
366 adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
367 adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
370 * We do not support DCB, VMDq, and RSS all simultaneously
371 * so we will disable RSS since it is the lowest priority
373 adapter->ring_feature[RING_F_RSS].indices = 1;
374 adapter->ring_feature[RING_F_RSS].mask = IXGBE_RSS_DISABLED_MASK;
376 /* disable ATR as it is not supported when VMDq is enabled */
377 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
379 adapter->num_rx_pools = vmdq_i;
380 adapter->num_rx_queues_per_pool = tcs;
382 adapter->num_tx_queues = vmdq_i * tcs;
383 adapter->num_rx_queues = vmdq_i * tcs;
386 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
387 struct ixgbe_ring_feature *fcoe;
389 fcoe = &adapter->ring_feature[RING_F_FCOE];
391 /* limit ourselves based on feature limits */
392 fcoe_i = min_t(u16, fcoe_i, fcoe->limit);
395 /* alloc queues for FCoE separately */
396 fcoe->indices = fcoe_i;
397 fcoe->offset = vmdq_i * tcs;
399 /* add queues to adapter */
400 adapter->num_tx_queues += fcoe_i;
401 adapter->num_rx_queues += fcoe_i;
402 } else if (tcs > 1) {
403 /* use queue belonging to FcoE TC */
405 fcoe->offset = ixgbe_fcoe_get_tc(adapter);
407 adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
414 #endif /* IXGBE_FCOE */
415 /* configure TC to queue mapping */
416 for (i = 0; i < tcs; i++)
417 netdev_set_tc_queue(adapter->netdev, i, 1, i);
422 static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
424 struct net_device *dev = adapter->netdev;
425 struct ixgbe_ring_feature *f;
429 /* Map queue offset and counts onto allocated tx queues */
430 tcs = netdev_get_num_tc(dev);
432 /* verify we have DCB queueing enabled before proceeding */
436 /* determine the upper limit for our current DCB mode */
437 rss_i = dev->num_tx_queues / tcs;
438 if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
439 /* 8 TC w/ 4 queues per TC */
440 rss_i = min_t(u16, rss_i, 4);
441 rss_m = IXGBE_RSS_4Q_MASK;
442 } else if (tcs > 4) {
443 /* 8 TC w/ 8 queues per TC */
444 rss_i = min_t(u16, rss_i, 8);
445 rss_m = IXGBE_RSS_8Q_MASK;
447 /* 4 TC w/ 16 queues per TC */
448 rss_i = min_t(u16, rss_i, 16);
449 rss_m = IXGBE_RSS_16Q_MASK;
452 /* set RSS mask and indices */
453 f = &adapter->ring_feature[RING_F_RSS];
454 rss_i = min_t(int, rss_i, f->limit);
458 /* disable ATR as it is not supported when multiple TCs are enabled */
459 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
462 /* FCoE enabled queues require special configuration indexed
463 * by feature specific indices and offset. Here we map FCoE
464 * indices onto the DCB queue pairs allowing FCoE to own
465 * configuration later.
467 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
468 u8 tc = ixgbe_fcoe_get_tc(adapter);
470 f = &adapter->ring_feature[RING_F_FCOE];
471 f->indices = min_t(u16, rss_i, f->limit);
472 f->offset = rss_i * tc;
475 #endif /* IXGBE_FCOE */
476 for (i = 0; i < tcs; i++)
477 netdev_set_tc_queue(dev, i, rss_i, rss_i * i);
479 adapter->num_tx_queues = rss_i * tcs;
480 adapter->num_rx_queues = rss_i * tcs;
487 * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
488 * @adapter: board private structure to initialize
490 * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
491 * and VM pools where appropriate. If RSS is available, then also try and
492 * enable RSS and map accordingly.
495 static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter)
497 u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
499 u16 rss_i = adapter->ring_feature[RING_F_RSS].limit;
500 u16 rss_m = IXGBE_RSS_DISABLED_MASK;
504 bool pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1);
506 /* only proceed if SR-IOV is enabled */
507 if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
510 /* Add starting offset to total pool count */
511 vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
513 /* double check we are limited to maximum pools */
514 vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i);
516 /* 64 pool mode with 2 queues per pool */
517 if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) {
518 vmdq_m = IXGBE_82599_VMDQ_2Q_MASK;
519 rss_m = IXGBE_RSS_2Q_MASK;
520 rss_i = min_t(u16, rss_i, 2);
521 /* 32 pool mode with 4 queues per pool */
523 vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
524 rss_m = IXGBE_RSS_4Q_MASK;
529 /* queues in the remaining pools are available for FCoE */
530 fcoe_i = 128 - (vmdq_i * __ALIGN_MASK(1, ~vmdq_m));
533 /* remove the starting offset from the pool count */
534 vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
536 /* save features for later use */
537 adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
538 adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
540 /* limit RSS based on user input and save for later use */
541 adapter->ring_feature[RING_F_RSS].indices = rss_i;
542 adapter->ring_feature[RING_F_RSS].mask = rss_m;
544 adapter->num_rx_pools = vmdq_i;
545 adapter->num_rx_queues_per_pool = rss_i;
547 adapter->num_rx_queues = vmdq_i * rss_i;
548 adapter->num_tx_queues = vmdq_i * rss_i;
550 /* disable ATR as it is not supported when VMDq is enabled */
551 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
555 * FCoE can use rings from adjacent buffers to allow RSS
556 * like behavior. To account for this we need to add the
557 * FCoE indices to the total ring count.
559 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
560 struct ixgbe_ring_feature *fcoe;
562 fcoe = &adapter->ring_feature[RING_F_FCOE];
564 /* limit ourselves based on feature limits */
565 fcoe_i = min_t(u16, fcoe_i, fcoe->limit);
567 if (vmdq_i > 1 && fcoe_i) {
568 /* alloc queues for FCoE separately */
569 fcoe->indices = fcoe_i;
570 fcoe->offset = vmdq_i * rss_i;
572 /* merge FCoE queues with RSS queues */
573 fcoe_i = min_t(u16, fcoe_i + rss_i, num_online_cpus());
575 /* limit indices to rss_i if MSI-X is disabled */
576 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
579 /* attempt to reserve some queues for just FCoE */
580 fcoe->indices = min_t(u16, fcoe_i, fcoe->limit);
581 fcoe->offset = fcoe_i - fcoe->indices;
586 /* add queues to adapter */
587 adapter->num_tx_queues += fcoe_i;
588 adapter->num_rx_queues += fcoe_i;
596 * ixgbe_set_rss_queues - Allocate queues for RSS
597 * @adapter: board private structure to initialize
599 * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try
600 * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
603 static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
605 struct ixgbe_ring_feature *f;
608 /* set mask for 16 queue limit of RSS */
609 f = &adapter->ring_feature[RING_F_RSS];
613 f->mask = IXGBE_RSS_16Q_MASK;
615 /* disable ATR by default, it will be configured below */
616 adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
619 * Use Flow Director in addition to RSS to ensure the best
620 * distribution of flows across cores, even when an FDIR flow
623 if (rss_i > 1 && adapter->atr_sample_rate) {
624 f = &adapter->ring_feature[RING_F_FDIR];
626 rss_i = f->indices = f->limit;
628 if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
629 adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
634 * FCoE can exist on the same rings as standard network traffic
635 * however it is preferred to avoid that if possible. In order
636 * to get the best performance we allocate as many FCoE queues
637 * as we can and we place them at the end of the ring array to
638 * avoid sharing queues with standard RSS on systems with 24 or
641 if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
642 struct net_device *dev = adapter->netdev;
645 f = &adapter->ring_feature[RING_F_FCOE];
647 /* merge FCoE queues with RSS queues */
648 fcoe_i = min_t(u16, f->limit + rss_i, num_online_cpus());
649 fcoe_i = min_t(u16, fcoe_i, dev->num_tx_queues);
651 /* limit indices to rss_i if MSI-X is disabled */
652 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
655 /* attempt to reserve some queues for just FCoE */
656 f->indices = min_t(u16, fcoe_i, f->limit);
657 f->offset = fcoe_i - f->indices;
658 rss_i = max_t(u16, fcoe_i, rss_i);
661 #endif /* IXGBE_FCOE */
662 adapter->num_rx_queues = rss_i;
663 adapter->num_tx_queues = rss_i;
669 * ixgbe_set_num_queues - Allocate queues for device, feature dependent
670 * @adapter: board private structure to initialize
672 * This is the top level queue allocation routine. The order here is very
673 * important, starting with the "most" number of features turned on at once,
674 * and ending with the smallest set of features. This way large combinations
675 * can be allocated if they're turned on, and smaller combinations are the
676 * fallthrough conditions.
679 static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
681 /* Start with base case */
682 adapter->num_rx_queues = 1;
683 adapter->num_tx_queues = 1;
684 adapter->num_rx_pools = adapter->num_rx_queues;
685 adapter->num_rx_queues_per_pool = 1;
687 #ifdef CONFIG_IXGBE_DCB
688 if (ixgbe_set_dcb_sriov_queues(adapter))
691 if (ixgbe_set_dcb_queues(adapter))
695 if (ixgbe_set_sriov_queues(adapter))
698 ixgbe_set_rss_queues(adapter);
702 * ixgbe_acquire_msix_vectors - acquire MSI-X vectors
703 * @adapter: board private structure
705 * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
706 * return a negative error code if unable to acquire MSI-X vectors for any
709 static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter)
711 struct ixgbe_hw *hw = &adapter->hw;
712 int i, vectors, vector_threshold;
714 /* We start by asking for one vector per queue pair */
715 vectors = max(adapter->num_rx_queues, adapter->num_tx_queues);
717 /* It is easy to be greedy for MSI-X vectors. However, it really
718 * doesn't do much good if we have a lot more vectors than CPUs. We'll
719 * be somewhat conservative and only ask for (roughly) the same number
720 * of vectors as there are CPUs.
722 vectors = min_t(int, vectors, num_online_cpus());
724 /* Some vectors are necessary for non-queue interrupts */
725 vectors += NON_Q_VECTORS;
727 /* Hardware can only support a maximum of hw.mac->max_msix_vectors.
728 * With features such as RSS and VMDq, we can easily surpass the
729 * number of Rx and Tx descriptor queues supported by our device.
730 * Thus, we cap the maximum in the rare cases where the CPU count also
731 * exceeds our vector limit
733 vectors = min_t(int, vectors, hw->mac.max_msix_vectors);
735 /* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
736 * handler, and (2) an Other (Link Status Change, etc.) handler.
738 vector_threshold = MIN_MSIX_COUNT;
740 adapter->msix_entries = kcalloc(vectors,
741 sizeof(struct msix_entry),
743 if (!adapter->msix_entries)
746 for (i = 0; i < vectors; i++)
747 adapter->msix_entries[i].entry = i;
749 vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
750 vector_threshold, vectors);
753 /* A negative count of allocated vectors indicates an error in
754 * acquiring within the specified range of MSI-X vectors
756 e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
759 adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
760 kfree(adapter->msix_entries);
761 adapter->msix_entries = NULL;
766 /* we successfully allocated some number of vectors within our
769 adapter->flags |= IXGBE_FLAG_MSIX_ENABLED;
771 /* Adjust for only the vectors we'll use, which is minimum
772 * of max_q_vectors, or the number of vectors we were allocated.
774 vectors -= NON_Q_VECTORS;
775 adapter->num_q_vectors = min_t(int, vectors, adapter->max_q_vectors);
780 static void ixgbe_add_ring(struct ixgbe_ring *ring,
781 struct ixgbe_ring_container *head)
783 ring->next = head->ring;
789 * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
790 * @adapter: board private structure to initialize
791 * @v_count: q_vectors allocated on adapter, used for ring interleaving
792 * @v_idx: index of vector in adapter struct
793 * @txr_count: total number of Tx rings to allocate
794 * @txr_idx: index of first Tx ring to allocate
795 * @rxr_count: total number of Rx rings to allocate
796 * @rxr_idx: index of first Rx ring to allocate
798 * We allocate one q_vector. If allocation fails we return -ENOMEM.
800 static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
801 int v_count, int v_idx,
802 int txr_count, int txr_idx,
803 int rxr_count, int rxr_idx)
805 struct ixgbe_q_vector *q_vector;
806 struct ixgbe_ring *ring;
807 int node = NUMA_NO_NODE;
809 int ring_count, size;
810 u8 tcs = netdev_get_num_tc(adapter->netdev);
812 ring_count = txr_count + rxr_count;
813 size = sizeof(struct ixgbe_q_vector) +
814 (sizeof(struct ixgbe_ring) * ring_count);
816 /* customize cpu for Flow Director mapping */
817 if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
818 u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
819 if (rss_i > 1 && adapter->atr_sample_rate) {
820 if (cpu_online(v_idx)) {
822 node = cpu_to_node(cpu);
827 /* allocate q_vector and rings */
828 q_vector = kzalloc_node(size, GFP_KERNEL, node);
830 q_vector = kzalloc(size, GFP_KERNEL);
834 /* setup affinity mask and node */
836 cpumask_set_cpu(cpu, &q_vector->affinity_mask);
837 q_vector->numa_node = node;
839 #ifdef CONFIG_IXGBE_DCA
840 /* initialize CPU for DCA */
844 /* initialize NAPI */
845 netif_napi_add(adapter->netdev, &q_vector->napi,
847 napi_hash_add(&q_vector->napi);
849 #ifdef CONFIG_NET_RX_BUSY_POLL
850 /* initialize busy poll */
851 atomic_set(&q_vector->state, IXGBE_QV_STATE_DISABLE);
854 /* tie q_vector and adapter together */
855 adapter->q_vector[v_idx] = q_vector;
856 q_vector->adapter = adapter;
857 q_vector->v_idx = v_idx;
859 /* initialize work limits */
860 q_vector->tx.work_limit = adapter->tx_work_limit;
862 /* initialize pointer to rings */
863 ring = q_vector->ring;
866 if (txr_count && !rxr_count) {
868 if (adapter->tx_itr_setting == 1)
869 q_vector->itr = IXGBE_10K_ITR;
871 q_vector->itr = adapter->tx_itr_setting;
873 /* rx or rx/tx vector */
874 if (adapter->rx_itr_setting == 1)
875 q_vector->itr = IXGBE_20K_ITR;
877 q_vector->itr = adapter->rx_itr_setting;
881 /* assign generic ring traits */
882 ring->dev = &adapter->pdev->dev;
883 ring->netdev = adapter->netdev;
885 /* configure backlink on ring */
886 ring->q_vector = q_vector;
888 /* update q_vector Tx values */
889 ixgbe_add_ring(ring, &q_vector->tx);
891 /* apply Tx specific ring traits */
892 ring->count = adapter->tx_ring_count;
893 if (adapter->num_rx_pools > 1)
895 txr_idx % adapter->num_rx_queues_per_pool;
897 ring->queue_index = txr_idx;
899 /* assign ring to adapter */
900 adapter->tx_ring[txr_idx] = ring;
902 /* update count and index */
906 /* push pointer to next ring */
911 /* assign generic ring traits */
912 ring->dev = &adapter->pdev->dev;
913 ring->netdev = adapter->netdev;
915 /* configure backlink on ring */
916 ring->q_vector = q_vector;
918 /* update q_vector Rx values */
919 ixgbe_add_ring(ring, &q_vector->rx);
922 * 82599 errata, UDP frames with a 0 checksum
923 * can be marked as checksum errors.
925 if (adapter->hw.mac.type == ixgbe_mac_82599EB)
926 set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state);
929 if (adapter->netdev->features & NETIF_F_FCOE_MTU) {
930 struct ixgbe_ring_feature *f;
931 f = &adapter->ring_feature[RING_F_FCOE];
932 if ((rxr_idx >= f->offset) &&
933 (rxr_idx < f->offset + f->indices))
934 set_bit(__IXGBE_RX_FCOE, &ring->state);
937 #endif /* IXGBE_FCOE */
938 /* apply Rx specific ring traits */
939 ring->count = adapter->rx_ring_count;
940 if (adapter->num_rx_pools > 1)
942 rxr_idx % adapter->num_rx_queues_per_pool;
944 ring->queue_index = rxr_idx;
946 /* assign ring to adapter */
947 adapter->rx_ring[rxr_idx] = ring;
949 /* update count and index */
953 /* push pointer to next ring */
961 * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector
962 * @adapter: board private structure to initialize
963 * @v_idx: Index of vector to be freed
965 * This function frees the memory allocated to the q_vector. In addition if
966 * NAPI is enabled it will delete any references to the NAPI struct prior
967 * to freeing the q_vector.
969 static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
971 struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx];
972 struct ixgbe_ring *ring;
974 ixgbe_for_each_ring(ring, q_vector->tx)
975 adapter->tx_ring[ring->queue_index] = NULL;
977 ixgbe_for_each_ring(ring, q_vector->rx)
978 adapter->rx_ring[ring->queue_index] = NULL;
980 adapter->q_vector[v_idx] = NULL;
981 napi_hash_del(&q_vector->napi);
982 netif_napi_del(&q_vector->napi);
985 * ixgbe_get_stats64() might access the rings on this vector,
986 * we must wait a grace period before freeing it.
988 kfree_rcu(q_vector, rcu);
992 * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
993 * @adapter: board private structure to initialize
995 * We allocate one q_vector per queue interrupt. If allocation fails we
998 static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
1000 int q_vectors = adapter->num_q_vectors;
1001 int rxr_remaining = adapter->num_rx_queues;
1002 int txr_remaining = adapter->num_tx_queues;
1003 int rxr_idx = 0, txr_idx = 0, v_idx = 0;
1006 /* only one q_vector if MSI-X is disabled. */
1007 if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
1010 if (q_vectors >= (rxr_remaining + txr_remaining)) {
1011 for (; rxr_remaining; v_idx++) {
1012 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
1018 /* update counts and index */
1024 for (; v_idx < q_vectors; v_idx++) {
1025 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
1026 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1027 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
1034 /* update counts and index */
1035 rxr_remaining -= rqpv;
1036 txr_remaining -= tqpv;
1044 adapter->num_tx_queues = 0;
1045 adapter->num_rx_queues = 0;
1046 adapter->num_q_vectors = 0;
1049 ixgbe_free_q_vector(adapter, v_idx);
1055 * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors
1056 * @adapter: board private structure to initialize
1058 * This function frees the memory allocated to the q_vectors. In addition if
1059 * NAPI is enabled it will delete any references to the NAPI struct prior
1060 * to freeing the q_vector.
1062 static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter)
1064 int v_idx = adapter->num_q_vectors;
1066 adapter->num_tx_queues = 0;
1067 adapter->num_rx_queues = 0;
1068 adapter->num_q_vectors = 0;
1071 ixgbe_free_q_vector(adapter, v_idx);
1074 static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
1076 if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
1077 adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
1078 pci_disable_msix(adapter->pdev);
1079 kfree(adapter->msix_entries);
1080 adapter->msix_entries = NULL;
1081 } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
1082 adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED;
1083 pci_disable_msi(adapter->pdev);
1088 * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
1089 * @adapter: board private structure to initialize
1091 * Attempt to configure the interrupts using the best available
1092 * capabilities of the hardware and the kernel.
1094 static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
1098 /* We will try to get MSI-X interrupts first */
1099 if (!ixgbe_acquire_msix_vectors(adapter))
1102 /* At this point, we do not have MSI-X capabilities. We need to
1103 * reconfigure or disable various features which require MSI-X
1107 /* Disable DCB unless we only have a single traffic class */
1108 if (netdev_get_num_tc(adapter->netdev) > 1) {
1109 e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
1110 netdev_reset_tc(adapter->netdev);
1112 if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1113 adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
1115 adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
1116 adapter->temp_dcb_cfg.pfc_mode_enable = false;
1117 adapter->dcb_cfg.pfc_mode_enable = false;
1120 adapter->dcb_cfg.num_tcs.pg_tcs = 1;
1121 adapter->dcb_cfg.num_tcs.pfc_tcs = 1;
1123 /* Disable SR-IOV support */
1124 e_dev_warn("Disabling SR-IOV support\n");
1125 ixgbe_disable_sriov(adapter);
1128 e_dev_warn("Disabling RSS support\n");
1129 adapter->ring_feature[RING_F_RSS].limit = 1;
1131 /* recalculate number of queues now that many features have been
1132 * changed or disabled.
1134 ixgbe_set_num_queues(adapter);
1135 adapter->num_q_vectors = 1;
1137 err = pci_enable_msi(adapter->pdev);
1139 e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
1142 adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
1146 * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
1147 * @adapter: board private structure to initialize
1149 * We determine which interrupt scheme to use based on...
1150 * - Kernel support (MSI, MSI-X)
1151 * - which can be user-defined (via MODULE_PARAM)
1152 * - Hardware queue count (num_*_queues)
1153 * - defined by miscellaneous hardware support/features (RSS, etc.)
1155 int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
1159 /* Number of supported queues */
1160 ixgbe_set_num_queues(adapter);
1162 /* Set interrupt mode */
1163 ixgbe_set_interrupt_capability(adapter);
1165 err = ixgbe_alloc_q_vectors(adapter);
1167 e_dev_err("Unable to allocate memory for queue vectors\n");
1168 goto err_alloc_q_vectors;
1171 ixgbe_cache_ring_register(adapter);
1173 e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
1174 (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
1175 adapter->num_rx_queues, adapter->num_tx_queues);
1177 set_bit(__IXGBE_DOWN, &adapter->state);
1181 err_alloc_q_vectors:
1182 ixgbe_reset_interrupt_capability(adapter);
1187 * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
1188 * @adapter: board private structure to clear interrupt scheme on
1190 * We go through and clear interrupt specific resources and reset the structure
1191 * to pre-load conditions
1193 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
1195 adapter->num_tx_queues = 0;
1196 adapter->num_rx_queues = 0;
1198 ixgbe_free_q_vectors(adapter);
1199 ixgbe_reset_interrupt_capability(adapter);
1202 void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
1203 u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx)
1205 struct ixgbe_adv_tx_context_desc *context_desc;
1206 u16 i = tx_ring->next_to_use;
1208 context_desc = IXGBE_TX_CTXTDESC(tx_ring, i);
1211 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1213 /* set bits to identify this as an advanced context descriptor */
1214 type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1216 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
1217 context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof);
1218 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
1219 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);