]> Git Repo - linux.git/blob - drivers/net/ethernet/ibm/ibmvnic.c
Merge tag 'irq-core-2025-01-21' into loongarch-next
[linux.git] / drivers / net / ethernet / ibm / ibmvnic.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /**************************************************************************/
3 /*                                                                        */
4 /*  IBM System i and System p Virtual NIC Device Driver                   */
5 /*  Copyright (C) 2014 IBM Corp.                                          */
6 /*  Santiago Leon ([email protected])                                  */
7 /*  Thomas Falcon ([email protected])                           */
8 /*  John Allen ([email protected])                                */
9 /*                                                                        */
10 /*                                                                        */
11 /* This module contains the implementation of a virtual ethernet device   */
12 /* for use with IBM i/p Series LPAR Linux. It utilizes the logical LAN    */
13 /* option of the RS/6000 Platform Architecture to interface with virtual  */
14 /* ethernet NICs that are presented to the partition by the hypervisor.   */
15 /*                                                                         */
16 /* Messages are passed between the VNIC driver and the VNIC server using  */
17 /* Command/Response Queues (CRQs) and sub CRQs (sCRQs). CRQs are used to  */
18 /* issue and receive commands that initiate communication with the server */
19 /* on driver initialization. Sub CRQs (sCRQs) are similar to CRQs, but    */
20 /* are used by the driver to notify the server that a packet is           */
21 /* ready for transmission or that a buffer has been added to receive a    */
22 /* packet. Subsequently, sCRQs are used by the server to notify the       */
23 /* driver that a packet transmission has been completed or that a packet  */
24 /* has been received and placed in a waiting buffer.                      */
25 /*                                                                        */
26 /* In lieu of a more conventional "on-the-fly" DMA mapping strategy in    */
27 /* which skbs are DMA mapped and immediately unmapped when the transmit   */
28 /* or receive has been completed, the VNIC driver is required to use      */
29 /* "long term mapping". This entails that large, continuous DMA mapped    */
30 /* buffers are allocated on driver initialization and these buffers are   */
31 /* then continuously reused to pass skbs to and from the VNIC server.     */
32 /*                                                                        */
33 /**************************************************************************/
34
35 #include <linux/module.h>
36 #include <linux/moduleparam.h>
37 #include <linux/types.h>
38 #include <linux/errno.h>
39 #include <linux/completion.h>
40 #include <linux/ioport.h>
41 #include <linux/dma-mapping.h>
42 #include <linux/kernel.h>
43 #include <linux/netdevice.h>
44 #include <linux/etherdevice.h>
45 #include <linux/skbuff.h>
46 #include <linux/init.h>
47 #include <linux/delay.h>
48 #include <linux/mm.h>
49 #include <linux/ethtool.h>
50 #include <linux/proc_fs.h>
51 #include <linux/if_arp.h>
52 #include <linux/in.h>
53 #include <linux/ip.h>
54 #include <linux/ipv6.h>
55 #include <linux/irq.h>
56 #include <linux/irqdomain.h>
57 #include <linux/kthread.h>
58 #include <linux/seq_file.h>
59 #include <linux/interrupt.h>
60 #include <net/net_namespace.h>
61 #include <asm/hvcall.h>
62 #include <linux/atomic.h>
63 #include <asm/vio.h>
64 #include <asm/xive.h>
65 #include <asm/iommu.h>
66 #include <linux/uaccess.h>
67 #include <asm/firmware.h>
68 #include <linux/workqueue.h>
69 #include <linux/if_vlan.h>
70 #include <linux/utsname.h>
71 #include <linux/cpu.h>
72
73 #include "ibmvnic.h"
74
75 static const char ibmvnic_driver_name[] = "ibmvnic";
76 static const char ibmvnic_driver_string[] = "IBM System i/p Virtual NIC Driver";
77
78 MODULE_AUTHOR("Santiago Leon");
79 MODULE_DESCRIPTION("IBM System i/p Virtual NIC Driver");
80 MODULE_LICENSE("GPL");
81 MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
82
83 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
84 static void release_sub_crqs(struct ibmvnic_adapter *, bool);
85 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
86 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
87 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
88 static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *);
89 static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64);
90 static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance);
91 static int enable_scrq_irq(struct ibmvnic_adapter *,
92                            struct ibmvnic_sub_crq_queue *);
93 static int disable_scrq_irq(struct ibmvnic_adapter *,
94                             struct ibmvnic_sub_crq_queue *);
95 static int pending_scrq(struct ibmvnic_adapter *,
96                         struct ibmvnic_sub_crq_queue *);
97 static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
98                                         struct ibmvnic_sub_crq_queue *);
99 static int ibmvnic_poll(struct napi_struct *napi, int data);
100 static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
101 static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
102 static void send_query_map(struct ibmvnic_adapter *adapter);
103 static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
104 static int send_request_unmap(struct ibmvnic_adapter *, u8);
105 static int send_login(struct ibmvnic_adapter *adapter);
106 static void send_query_cap(struct ibmvnic_adapter *adapter);
107 static int init_sub_crqs(struct ibmvnic_adapter *);
108 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
109 static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset);
110 static void release_crq_queue(struct ibmvnic_adapter *);
111 static int __ibmvnic_set_mac(struct net_device *, u8 *);
112 static int init_crq_queue(struct ibmvnic_adapter *adapter);
113 static int send_query_phys_parms(struct ibmvnic_adapter *adapter);
114 static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
115                                          struct ibmvnic_sub_crq_queue *tx_scrq);
116 static void free_long_term_buff(struct ibmvnic_adapter *adapter,
117                                 struct ibmvnic_long_term_buff *ltb);
118 static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
119 static void flush_reset_queue(struct ibmvnic_adapter *adapter);
120 static void print_subcrq_error(struct device *dev, int rc, const char *func);
121
122 struct ibmvnic_stat {
123         char name[ETH_GSTRING_LEN];
124         int offset;
125 };
126
127 #define IBMVNIC_STAT_OFF(stat) (offsetof(struct ibmvnic_adapter, stats) + \
128                              offsetof(struct ibmvnic_statistics, stat))
129 #define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + (off))))
130
131 static const struct ibmvnic_stat ibmvnic_stats[] = {
132         {"rx_packets", IBMVNIC_STAT_OFF(rx_packets)},
133         {"rx_bytes", IBMVNIC_STAT_OFF(rx_bytes)},
134         {"tx_packets", IBMVNIC_STAT_OFF(tx_packets)},
135         {"tx_bytes", IBMVNIC_STAT_OFF(tx_bytes)},
136         {"ucast_tx_packets", IBMVNIC_STAT_OFF(ucast_tx_packets)},
137         {"ucast_rx_packets", IBMVNIC_STAT_OFF(ucast_rx_packets)},
138         {"mcast_tx_packets", IBMVNIC_STAT_OFF(mcast_tx_packets)},
139         {"mcast_rx_packets", IBMVNIC_STAT_OFF(mcast_rx_packets)},
140         {"bcast_tx_packets", IBMVNIC_STAT_OFF(bcast_tx_packets)},
141         {"bcast_rx_packets", IBMVNIC_STAT_OFF(bcast_rx_packets)},
142         {"align_errors", IBMVNIC_STAT_OFF(align_errors)},
143         {"fcs_errors", IBMVNIC_STAT_OFF(fcs_errors)},
144         {"single_collision_frames", IBMVNIC_STAT_OFF(single_collision_frames)},
145         {"multi_collision_frames", IBMVNIC_STAT_OFF(multi_collision_frames)},
146         {"sqe_test_errors", IBMVNIC_STAT_OFF(sqe_test_errors)},
147         {"deferred_tx", IBMVNIC_STAT_OFF(deferred_tx)},
148         {"late_collisions", IBMVNIC_STAT_OFF(late_collisions)},
149         {"excess_collisions", IBMVNIC_STAT_OFF(excess_collisions)},
150         {"internal_mac_tx_errors", IBMVNIC_STAT_OFF(internal_mac_tx_errors)},
151         {"carrier_sense", IBMVNIC_STAT_OFF(carrier_sense)},
152         {"too_long_frames", IBMVNIC_STAT_OFF(too_long_frames)},
153         {"internal_mac_rx_errors", IBMVNIC_STAT_OFF(internal_mac_rx_errors)},
154 };
155
156 static int send_crq_init_complete(struct ibmvnic_adapter *adapter)
157 {
158         union ibmvnic_crq crq;
159
160         memset(&crq, 0, sizeof(crq));
161         crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
162         crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE;
163
164         return ibmvnic_send_crq(adapter, &crq);
165 }
166
167 static int send_version_xchg(struct ibmvnic_adapter *adapter)
168 {
169         union ibmvnic_crq crq;
170
171         memset(&crq, 0, sizeof(crq));
172         crq.version_exchange.first = IBMVNIC_CRQ_CMD;
173         crq.version_exchange.cmd = VERSION_EXCHANGE;
174         crq.version_exchange.version = cpu_to_be16(ibmvnic_version);
175
176         return ibmvnic_send_crq(adapter, &crq);
177 }
178
179 static void ibmvnic_clean_queue_affinity(struct ibmvnic_adapter *adapter,
180                                          struct ibmvnic_sub_crq_queue *queue)
181 {
182         if (!(queue && queue->irq))
183                 return;
184
185         cpumask_clear(queue->affinity_mask);
186
187         if (irq_set_affinity_and_hint(queue->irq, NULL))
188                 netdev_warn(adapter->netdev,
189                             "%s: Clear affinity failed, queue addr = %p, IRQ = %d\n",
190                             __func__, queue, queue->irq);
191 }
192
193 static void ibmvnic_clean_affinity(struct ibmvnic_adapter *adapter)
194 {
195         struct ibmvnic_sub_crq_queue **rxqs;
196         struct ibmvnic_sub_crq_queue **txqs;
197         int num_rxqs, num_txqs;
198         int i;
199
200         rxqs = adapter->rx_scrq;
201         txqs = adapter->tx_scrq;
202         num_txqs = adapter->num_active_tx_scrqs;
203         num_rxqs = adapter->num_active_rx_scrqs;
204
205         netdev_dbg(adapter->netdev, "%s: Cleaning irq affinity hints", __func__);
206         if (txqs) {
207                 for (i = 0; i < num_txqs; i++)
208                         ibmvnic_clean_queue_affinity(adapter, txqs[i]);
209         }
210         if (rxqs) {
211                 for (i = 0; i < num_rxqs; i++)
212                         ibmvnic_clean_queue_affinity(adapter, rxqs[i]);
213         }
214 }
215
216 static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
217                                       unsigned int *cpu, int *stragglers,
218                                       int stride)
219 {
220         cpumask_var_t mask;
221         int i;
222         int rc = 0;
223
224         if (!(queue && queue->irq))
225                 return rc;
226
227         /* cpumask_var_t is either a pointer or array, allocation works here */
228         if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
229                 return -ENOMEM;
230
231         /* while we have extra cpu give one extra to this irq */
232         if (*stragglers) {
233                 stride++;
234                 (*stragglers)--;
235         }
236         /* atomic write is safer than writing bit by bit directly */
237         for (i = 0; i < stride; i++) {
238                 cpumask_set_cpu(*cpu, mask);
239                 *cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
240                                          nr_cpu_ids, false);
241         }
242         /* set queue affinity mask */
243         cpumask_copy(queue->affinity_mask, mask);
244         rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask);
245         free_cpumask_var(mask);
246
247         return rc;
248 }
249
250 /* assumes cpu read lock is held */
251 static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
252 {
253         struct ibmvnic_sub_crq_queue **rxqs = adapter->rx_scrq;
254         struct ibmvnic_sub_crq_queue **txqs = adapter->tx_scrq;
255         struct ibmvnic_sub_crq_queue *queue;
256         int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0;
257         int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0;
258         int total_queues, stride, stragglers, i;
259         unsigned int num_cpu, cpu;
260         bool is_rx_queue;
261         int rc = 0;
262
263         netdev_dbg(adapter->netdev, "%s: Setting irq affinity hints", __func__);
264         if (!(adapter->rx_scrq && adapter->tx_scrq)) {
265                 netdev_warn(adapter->netdev,
266                             "%s: Set affinity failed, queues not allocated\n",
267                             __func__);
268                 return;
269         }
270
271         total_queues = num_rxqs + num_txqs;
272         num_cpu = num_online_cpus();
273         /* number of cpu's assigned per irq */
274         stride = max_t(int, num_cpu / total_queues, 1);
275         /* number of leftover cpu's */
276         stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0;
277         /* next available cpu to assign irq to */
278         cpu = cpumask_next(-1, cpu_online_mask);
279
280         for (i = 0; i < total_queues; i++) {
281                 is_rx_queue = false;
282                 /* balance core load by alternating rx and tx assignments
283                  * ex: TX0 -> RX0 -> TX1 -> RX1 etc.
284                  */
285                 if ((i % 2 == 1 && i_rxqs < num_rxqs) || i_txqs == num_txqs) {
286                         queue = rxqs[i_rxqs++];
287                         is_rx_queue = true;
288                 } else {
289                         queue = txqs[i_txqs++];
290                 }
291
292                 rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers,
293                                                 stride);
294                 if (rc)
295                         goto out;
296
297                 if (!queue || is_rx_queue)
298                         continue;
299
300                 rc = __netif_set_xps_queue(adapter->netdev,
301                                            cpumask_bits(queue->affinity_mask),
302                                            i_txqs - 1, XPS_CPUS);
303                 if (rc)
304                         netdev_warn(adapter->netdev, "%s: Set XPS on queue %d failed, rc = %d.\n",
305                                     __func__, i_txqs - 1, rc);
306         }
307
308 out:
309         if (rc) {
310                 netdev_warn(adapter->netdev,
311                             "%s: Set affinity failed, queue addr = %p, IRQ = %d, rc = %d.\n",
312                             __func__, queue, queue->irq, rc);
313                 ibmvnic_clean_affinity(adapter);
314         }
315 }
316
317 static int ibmvnic_cpu_online(unsigned int cpu, struct hlist_node *node)
318 {
319         struct ibmvnic_adapter *adapter;
320
321         adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node);
322         ibmvnic_set_affinity(adapter);
323         return 0;
324 }
325
326 static int ibmvnic_cpu_dead(unsigned int cpu, struct hlist_node *node)
327 {
328         struct ibmvnic_adapter *adapter;
329
330         adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node_dead);
331         ibmvnic_set_affinity(adapter);
332         return 0;
333 }
334
335 static int ibmvnic_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
336 {
337         struct ibmvnic_adapter *adapter;
338
339         adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node);
340         ibmvnic_clean_affinity(adapter);
341         return 0;
342 }
343
344 static enum cpuhp_state ibmvnic_online;
345
346 static int ibmvnic_cpu_notif_add(struct ibmvnic_adapter *adapter)
347 {
348         int ret;
349
350         ret = cpuhp_state_add_instance_nocalls(ibmvnic_online, &adapter->node);
351         if (ret)
352                 return ret;
353         ret = cpuhp_state_add_instance_nocalls(CPUHP_IBMVNIC_DEAD,
354                                                &adapter->node_dead);
355         if (!ret)
356                 return ret;
357         cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node);
358         return ret;
359 }
360
361 static void ibmvnic_cpu_notif_remove(struct ibmvnic_adapter *adapter)
362 {
363         cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node);
364         cpuhp_state_remove_instance_nocalls(CPUHP_IBMVNIC_DEAD,
365                                             &adapter->node_dead);
366 }
367
368 static long h_reg_sub_crq(unsigned long unit_address, unsigned long token,
369                           unsigned long length, unsigned long *number,
370                           unsigned long *irq)
371 {
372         unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
373         long rc;
374
375         rc = plpar_hcall(H_REG_SUB_CRQ, retbuf, unit_address, token, length);
376         *number = retbuf[0];
377         *irq = retbuf[1];
378
379         return rc;
380 }
381
382 /**
383  * ibmvnic_wait_for_completion - Check device state and wait for completion
384  * @adapter: private device data
385  * @comp_done: completion structure to wait for
386  * @timeout: time to wait in milliseconds
387  *
388  * Wait for a completion signal or until the timeout limit is reached
389  * while checking that the device is still active.
390  */
391 static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter,
392                                        struct completion *comp_done,
393                                        unsigned long timeout)
394 {
395         struct net_device *netdev;
396         unsigned long div_timeout;
397         u8 retry;
398
399         netdev = adapter->netdev;
400         retry = 5;
401         div_timeout = msecs_to_jiffies(timeout / retry);
402         while (true) {
403                 if (!adapter->crq.active) {
404                         netdev_err(netdev, "Device down!\n");
405                         return -ENODEV;
406                 }
407                 if (!retry--)
408                         break;
409                 if (wait_for_completion_timeout(comp_done, div_timeout))
410                         return 0;
411         }
412         netdev_err(netdev, "Operation timed out.\n");
413         return -ETIMEDOUT;
414 }
415
416 /**
417  * reuse_ltb() - Check if a long term buffer can be reused
418  * @ltb:  The long term buffer to be checked
419  * @size: The size of the long term buffer.
420  *
421  * An LTB can be reused unless its size has changed.
422  *
423  * Return: Return true if the LTB can be reused, false otherwise.
424  */
425 static bool reuse_ltb(struct ibmvnic_long_term_buff *ltb, int size)
426 {
427         return (ltb->buff && ltb->size == size);
428 }
429
430 /**
431  * alloc_long_term_buff() - Allocate a long term buffer (LTB)
432  *
433  * @adapter: ibmvnic adapter associated to the LTB
434  * @ltb:     container object for the LTB
435  * @size:    size of the LTB
436  *
437  * Allocate an LTB of the specified size and notify VIOS.
438  *
439  * If the given @ltb already has the correct size, reuse it. Otherwise if
440  * its non-NULL, free it. Then allocate a new one of the correct size.
441  * Notify the VIOS either way since we may now be working with a new VIOS.
442  *
443  * Allocating larger chunks of memory during resets, specially LPM or under
444  * low memory situations can cause resets to fail/timeout and for LPAR to
445  * lose connectivity. So hold onto the LTB even if we fail to communicate
446  * with the VIOS and reuse it on next open. Free LTB when adapter is closed.
447  *
448  * Return: 0 if we were able to allocate the LTB and notify the VIOS and
449  *         a negative value otherwise.
450  */
451 static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
452                                 struct ibmvnic_long_term_buff *ltb, int size)
453 {
454         struct device *dev = &adapter->vdev->dev;
455         u64 prev = 0;
456         int rc;
457
458         if (!reuse_ltb(ltb, size)) {
459                 dev_dbg(dev,
460                         "LTB size changed from 0x%llx to 0x%x, reallocating\n",
461                          ltb->size, size);
462                 prev = ltb->size;
463                 free_long_term_buff(adapter, ltb);
464         }
465
466         if (ltb->buff) {
467                 dev_dbg(dev, "Reusing LTB [map %d, size 0x%llx]\n",
468                         ltb->map_id, ltb->size);
469         } else {
470                 ltb->buff = dma_alloc_coherent(dev, size, &ltb->addr,
471                                                GFP_KERNEL);
472                 if (!ltb->buff) {
473                         dev_err(dev, "Couldn't alloc long term buffer\n");
474                         return -ENOMEM;
475                 }
476                 ltb->size = size;
477
478                 ltb->map_id = find_first_zero_bit(adapter->map_ids,
479                                                   MAX_MAP_ID);
480                 bitmap_set(adapter->map_ids, ltb->map_id, 1);
481
482                 dev_dbg(dev,
483                         "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n",
484                          ltb->map_id, ltb->size, prev);
485         }
486
487         /* Ensure ltb is zeroed - specially when reusing it. */
488         memset(ltb->buff, 0, ltb->size);
489
490         mutex_lock(&adapter->fw_lock);
491         adapter->fw_done_rc = 0;
492         reinit_completion(&adapter->fw_done);
493
494         rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
495         if (rc) {
496                 dev_err(dev, "send_request_map failed, rc = %d\n", rc);
497                 goto out;
498         }
499
500         rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
501         if (rc) {
502                 dev_err(dev, "LTB map request aborted or timed out, rc = %d\n",
503                         rc);
504                 goto out;
505         }
506
507         if (adapter->fw_done_rc) {
508                 dev_err(dev, "Couldn't map LTB, rc = %d\n",
509                         adapter->fw_done_rc);
510                 rc = -EIO;
511                 goto out;
512         }
513         rc = 0;
514 out:
515         /* don't free LTB on communication error - see function header */
516         mutex_unlock(&adapter->fw_lock);
517         return rc;
518 }
519
520 static void free_long_term_buff(struct ibmvnic_adapter *adapter,
521                                 struct ibmvnic_long_term_buff *ltb)
522 {
523         struct device *dev = &adapter->vdev->dev;
524
525         if (!ltb->buff)
526                 return;
527
528         /* VIOS automatically unmaps the long term buffer at remote
529          * end for the following resets:
530          * FAILOVER, MOBILITY, TIMEOUT.
531          */
532         if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
533             adapter->reset_reason != VNIC_RESET_MOBILITY &&
534             adapter->reset_reason != VNIC_RESET_TIMEOUT)
535                 send_request_unmap(adapter, ltb->map_id);
536
537         dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
538
539         ltb->buff = NULL;
540         /* mark this map_id free */
541         bitmap_clear(adapter->map_ids, ltb->map_id, 1);
542         ltb->map_id = 0;
543 }
544
545 /**
546  * free_ltb_set - free the given set of long term buffers (LTBS)
547  * @adapter: The ibmvnic adapter containing this ltb set
548  * @ltb_set: The ltb_set to be freed
549  *
550  * Free the set of LTBs in the given set.
551  */
552
553 static void free_ltb_set(struct ibmvnic_adapter *adapter,
554                          struct ibmvnic_ltb_set *ltb_set)
555 {
556         int i;
557
558         for (i = 0; i < ltb_set->num_ltbs; i++)
559                 free_long_term_buff(adapter, &ltb_set->ltbs[i]);
560
561         kfree(ltb_set->ltbs);
562         ltb_set->ltbs = NULL;
563         ltb_set->num_ltbs = 0;
564 }
565
566 /**
567  * alloc_ltb_set() - Allocate a set of long term buffers (LTBs)
568  *
569  * @adapter: ibmvnic adapter associated to the LTB
570  * @ltb_set: container object for the set of LTBs
571  * @num_buffs: Number of buffers in the LTB
572  * @buff_size: Size of each buffer in the LTB
573  *
574  * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size
575  * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the
576  * new set of LTBs have fewer LTBs than the old set, free the excess LTBs.
577  * If new set needs more than in old set, allocate the remaining ones.
578  * Try and reuse as many LTBs as possible and avoid reallocation.
579  *
580  * Any changes to this allocation strategy must be reflected in
581  * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb().
582  */
583 static int alloc_ltb_set(struct ibmvnic_adapter *adapter,
584                          struct ibmvnic_ltb_set *ltb_set, int num_buffs,
585                          int buff_size)
586 {
587         struct device *dev = &adapter->vdev->dev;
588         struct ibmvnic_ltb_set old_set;
589         struct ibmvnic_ltb_set new_set;
590         int rem_size;
591         int tot_size;           /* size of all ltbs */
592         int ltb_size;           /* size of one ltb */
593         int nltbs;
594         int rc;
595         int n;
596         int i;
597
598         dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs,
599                 buff_size);
600
601         ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size);
602         tot_size = num_buffs * buff_size;
603
604         if (ltb_size > tot_size)
605                 ltb_size = tot_size;
606
607         nltbs = tot_size / ltb_size;
608         if (tot_size % ltb_size)
609                 nltbs++;
610
611         old_set = *ltb_set;
612
613         if (old_set.num_ltbs == nltbs) {
614                 new_set = old_set;
615         } else {
616                 int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff);
617
618                 new_set.ltbs = kzalloc(tmp, GFP_KERNEL);
619                 if (!new_set.ltbs)
620                         return -ENOMEM;
621
622                 new_set.num_ltbs = nltbs;
623
624                 /* Free any excess ltbs in old set */
625                 for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++)
626                         free_long_term_buff(adapter, &old_set.ltbs[i]);
627
628                 /* Copy remaining ltbs to new set. All LTBs except the
629                  * last one are of the same size. alloc_long_term_buff()
630                  * will realloc if the size changes.
631                  */
632                 n = min(old_set.num_ltbs, new_set.num_ltbs);
633                 for (i = 0; i < n; i++)
634                         new_set.ltbs[i] = old_set.ltbs[i];
635
636                 /* Any additional ltbs in new set will have NULL ltbs for
637                  * now and will be allocated in alloc_long_term_buff().
638                  */
639
640                 /* We no longer need the old_set so free it. Note that we
641                  * may have reused some ltbs from old set and freed excess
642                  * ltbs above. So we only need to free the container now
643                  * not the LTBs themselves. (i.e. dont free_ltb_set()!)
644                  */
645                 kfree(old_set.ltbs);
646                 old_set.ltbs = NULL;
647                 old_set.num_ltbs = 0;
648
649                 /* Install the new set. If allocations fail below, we will
650                  * retry later and know what size LTBs we need.
651                  */
652                 *ltb_set = new_set;
653         }
654
655         i = 0;
656         rem_size = tot_size;
657         while (rem_size) {
658                 if (ltb_size > rem_size)
659                         ltb_size = rem_size;
660
661                 rem_size -= ltb_size;
662
663                 rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size);
664                 if (rc)
665                         goto out;
666                 i++;
667         }
668
669         WARN_ON(i != new_set.num_ltbs);
670
671         return 0;
672 out:
673         /* We may have allocated one/more LTBs before failing and we
674          * want to try and reuse on next reset. So don't free ltb set.
675          */
676         return rc;
677 }
678
679 /**
680  * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB.
681  * @rxpool: The receive buffer pool containing buffer
682  * @bufidx: Index of buffer in rxpool
683  * @ltbp: (Output) pointer to the long term buffer containing the buffer
684  * @offset: (Output) offset of buffer in the LTB from @ltbp
685  *
686  * Map the given buffer identified by [rxpool, bufidx] to an LTB in the
687  * pool and its corresponding offset. Assume for now that each LTB is of
688  * different size but could possibly be optimized based on the allocation
689  * strategy in alloc_ltb_set().
690  */
691 static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool,
692                                   unsigned int bufidx,
693                                   struct ibmvnic_long_term_buff **ltbp,
694                                   unsigned int *offset)
695 {
696         struct ibmvnic_long_term_buff *ltb;
697         int nbufs;      /* # of buffers in one ltb */
698         int i;
699
700         WARN_ON(bufidx >= rxpool->size);
701
702         for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) {
703                 ltb = &rxpool->ltb_set.ltbs[i];
704                 nbufs = ltb->size / rxpool->buff_size;
705                 if (bufidx < nbufs)
706                         break;
707                 bufidx -= nbufs;
708         }
709
710         *ltbp = ltb;
711         *offset = bufidx * rxpool->buff_size;
712 }
713
714 /**
715  * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB.
716  * @txpool: The transmit buffer pool containing buffer
717  * @bufidx: Index of buffer in txpool
718  * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer
719  * @offset: (Output) offset of buffer in the LTB from @ltbp
720  *
721  * Map the given buffer identified by [txpool, bufidx] to an LTB in the
722  * pool and its corresponding offset.
723  */
724 static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool,
725                                   unsigned int bufidx,
726                                   struct ibmvnic_long_term_buff **ltbp,
727                                   unsigned int *offset)
728 {
729         struct ibmvnic_long_term_buff *ltb;
730         int nbufs;      /* # of buffers in one ltb */
731         int i;
732
733         WARN_ON_ONCE(bufidx >= txpool->num_buffers);
734
735         for (i = 0; i < txpool->ltb_set.num_ltbs; i++) {
736                 ltb = &txpool->ltb_set.ltbs[i];
737                 nbufs = ltb->size / txpool->buf_size;
738                 if (bufidx < nbufs)
739                         break;
740                 bufidx -= nbufs;
741         }
742
743         *ltbp = ltb;
744         *offset = bufidx * txpool->buf_size;
745 }
746
747 static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
748 {
749         int i;
750
751         for (i = 0; i < adapter->num_active_rx_pools; i++)
752                 adapter->rx_pool[i].active = 0;
753 }
754
755 static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
756                               struct ibmvnic_rx_pool *pool)
757 {
758         int count = pool->size - atomic_read(&pool->available);
759         u64 handle = adapter->rx_scrq[pool->index]->handle;
760         struct device *dev = &adapter->vdev->dev;
761         struct ibmvnic_ind_xmit_queue *ind_bufp;
762         struct ibmvnic_sub_crq_queue *rx_scrq;
763         struct ibmvnic_long_term_buff *ltb;
764         union sub_crq *sub_crq;
765         int buffers_added = 0;
766         unsigned long lpar_rc;
767         struct sk_buff *skb;
768         unsigned int offset;
769         dma_addr_t dma_addr;
770         unsigned char *dst;
771         int shift = 0;
772         int bufidx;
773         int i;
774
775         if (!pool->active)
776                 return;
777
778         rx_scrq = adapter->rx_scrq[pool->index];
779         ind_bufp = &rx_scrq->ind_buf;
780
781         /* netdev_skb_alloc() could have failed after we saved a few skbs
782          * in the indir_buf and we would not have sent them to VIOS yet.
783          * To account for them, start the loop at ind_bufp->index rather
784          * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will
785          * be 0.
786          */
787         for (i = ind_bufp->index; i < count; ++i) {
788                 bufidx = pool->free_map[pool->next_free];
789
790                 /* We maybe reusing the skb from earlier resets. Allocate
791                  * only if necessary. But since the LTB may have changed
792                  * during reset (see init_rx_pools()), update LTB below
793                  * even if reusing skb.
794                  */
795                 skb = pool->rx_buff[bufidx].skb;
796                 if (!skb) {
797                         skb = netdev_alloc_skb(adapter->netdev,
798                                                pool->buff_size);
799                         if (!skb) {
800                                 dev_err(dev, "Couldn't replenish rx buff\n");
801                                 adapter->replenish_no_mem++;
802                                 break;
803                         }
804                 }
805
806                 pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP;
807                 pool->next_free = (pool->next_free + 1) % pool->size;
808
809                 /* Copy the skb to the long term mapped DMA buffer */
810                 map_rxpool_buf_to_ltb(pool, bufidx, &ltb, &offset);
811                 dst = ltb->buff + offset;
812                 memset(dst, 0, pool->buff_size);
813                 dma_addr = ltb->addr + offset;
814
815                 /* add the skb to an rx_buff in the pool */
816                 pool->rx_buff[bufidx].data = dst;
817                 pool->rx_buff[bufidx].dma = dma_addr;
818                 pool->rx_buff[bufidx].skb = skb;
819                 pool->rx_buff[bufidx].pool_index = pool->index;
820                 pool->rx_buff[bufidx].size = pool->buff_size;
821
822                 /* queue the rx_buff for the next send_subcrq_indirect */
823                 sub_crq = &ind_bufp->indir_arr[ind_bufp->index++];
824                 memset(sub_crq, 0, sizeof(*sub_crq));
825                 sub_crq->rx_add.first = IBMVNIC_CRQ_CMD;
826                 sub_crq->rx_add.correlator =
827                     cpu_to_be64((u64)&pool->rx_buff[bufidx]);
828                 sub_crq->rx_add.ioba = cpu_to_be32(dma_addr);
829                 sub_crq->rx_add.map_id = ltb->map_id;
830
831                 /* The length field of the sCRQ is defined to be 24 bits so the
832                  * buffer size needs to be left shifted by a byte before it is
833                  * converted to big endian to prevent the last byte from being
834                  * truncated.
835                  */
836 #ifdef __LITTLE_ENDIAN__
837                 shift = 8;
838 #endif
839                 sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
840
841                 /* if send_subcrq_indirect queue is full, flush to VIOS */
842                 if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
843                     i == count - 1) {
844                         lpar_rc =
845                                 send_subcrq_indirect(adapter, handle,
846                                                      (u64)ind_bufp->indir_dma,
847                                                      (u64)ind_bufp->index);
848                         if (lpar_rc != H_SUCCESS)
849                                 goto failure;
850                         buffers_added += ind_bufp->index;
851                         adapter->replenish_add_buff_success += ind_bufp->index;
852                         ind_bufp->index = 0;
853                 }
854         }
855         atomic_add(buffers_added, &pool->available);
856         return;
857
858 failure:
859         if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
860                 dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
861         for (i = ind_bufp->index - 1; i >= 0; --i) {
862                 struct ibmvnic_rx_buff *rx_buff;
863
864                 pool->next_free = pool->next_free == 0 ?
865                                   pool->size - 1 : pool->next_free - 1;
866                 sub_crq = &ind_bufp->indir_arr[i];
867                 rx_buff = (struct ibmvnic_rx_buff *)
868                                 be64_to_cpu(sub_crq->rx_add.correlator);
869                 bufidx = (int)(rx_buff - pool->rx_buff);
870                 pool->free_map[pool->next_free] = bufidx;
871                 dev_kfree_skb_any(pool->rx_buff[bufidx].skb);
872                 pool->rx_buff[bufidx].skb = NULL;
873         }
874         adapter->replenish_add_buff_failure += ind_bufp->index;
875         atomic_add(buffers_added, &pool->available);
876         ind_bufp->index = 0;
877         if (lpar_rc == H_CLOSED || adapter->failover_pending) {
878                 /* Disable buffer pool replenishment and report carrier off if
879                  * queue is closed or pending failover.
880                  * Firmware guarantees that a signal will be sent to the
881                  * driver, triggering a reset.
882                  */
883                 deactivate_rx_pools(adapter);
884                 netif_carrier_off(adapter->netdev);
885         }
886 }
887
888 static void replenish_pools(struct ibmvnic_adapter *adapter)
889 {
890         int i;
891
892         adapter->replenish_task_cycles++;
893         for (i = 0; i < adapter->num_active_rx_pools; i++) {
894                 if (adapter->rx_pool[i].active)
895                         replenish_rx_pool(adapter, &adapter->rx_pool[i]);
896         }
897
898         netdev_dbg(adapter->netdev, "Replenished %d pools\n", i);
899 }
900
901 static void release_stats_buffers(struct ibmvnic_adapter *adapter)
902 {
903         kfree(adapter->tx_stats_buffers);
904         kfree(adapter->rx_stats_buffers);
905         adapter->tx_stats_buffers = NULL;
906         adapter->rx_stats_buffers = NULL;
907 }
908
909 static int init_stats_buffers(struct ibmvnic_adapter *adapter)
910 {
911         adapter->tx_stats_buffers =
912                                 kcalloc(IBMVNIC_MAX_QUEUES,
913                                         sizeof(struct ibmvnic_tx_queue_stats),
914                                         GFP_KERNEL);
915         if (!adapter->tx_stats_buffers)
916                 return -ENOMEM;
917
918         adapter->rx_stats_buffers =
919                                 kcalloc(IBMVNIC_MAX_QUEUES,
920                                         sizeof(struct ibmvnic_rx_queue_stats),
921                                         GFP_KERNEL);
922         if (!adapter->rx_stats_buffers)
923                 return -ENOMEM;
924
925         return 0;
926 }
927
928 static void release_stats_token(struct ibmvnic_adapter *adapter)
929 {
930         struct device *dev = &adapter->vdev->dev;
931
932         if (!adapter->stats_token)
933                 return;
934
935         dma_unmap_single(dev, adapter->stats_token,
936                          sizeof(struct ibmvnic_statistics),
937                          DMA_FROM_DEVICE);
938         adapter->stats_token = 0;
939 }
940
941 static int init_stats_token(struct ibmvnic_adapter *adapter)
942 {
943         struct device *dev = &adapter->vdev->dev;
944         dma_addr_t stok;
945         int rc;
946
947         stok = dma_map_single(dev, &adapter->stats,
948                               sizeof(struct ibmvnic_statistics),
949                               DMA_FROM_DEVICE);
950         rc = dma_mapping_error(dev, stok);
951         if (rc) {
952                 dev_err(dev, "Couldn't map stats buffer, rc = %d\n", rc);
953                 return rc;
954         }
955
956         adapter->stats_token = stok;
957         netdev_dbg(adapter->netdev, "Stats token initialized (%llx)\n", stok);
958         return 0;
959 }
960
961 /**
962  * release_rx_pools() - Release any rx pools attached to @adapter.
963  * @adapter: ibmvnic adapter
964  *
965  * Safe to call this multiple times - even if no pools are attached.
966  */
967 static void release_rx_pools(struct ibmvnic_adapter *adapter)
968 {
969         struct ibmvnic_rx_pool *rx_pool;
970         int i, j;
971
972         if (!adapter->rx_pool)
973                 return;
974
975         for (i = 0; i < adapter->num_active_rx_pools; i++) {
976                 rx_pool = &adapter->rx_pool[i];
977
978                 netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
979
980                 kfree(rx_pool->free_map);
981
982                 free_ltb_set(adapter, &rx_pool->ltb_set);
983
984                 if (!rx_pool->rx_buff)
985                         continue;
986
987                 for (j = 0; j < rx_pool->size; j++) {
988                         if (rx_pool->rx_buff[j].skb) {
989                                 dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
990                                 rx_pool->rx_buff[j].skb = NULL;
991                         }
992                 }
993
994                 kfree(rx_pool->rx_buff);
995         }
996
997         kfree(adapter->rx_pool);
998         adapter->rx_pool = NULL;
999         adapter->num_active_rx_pools = 0;
1000         adapter->prev_rx_pool_size = 0;
1001 }
1002
1003 /**
1004  * reuse_rx_pools() - Check if the existing rx pools can be reused.
1005  * @adapter: ibmvnic adapter
1006  *
1007  * Check if the existing rx pools in the adapter can be reused. The
1008  * pools can be reused if the pool parameters (number of pools,
1009  * number of buffers in the pool and size of each buffer) have not
1010  * changed.
1011  *
1012  * NOTE: This assumes that all pools have the same number of buffers
1013  *       which is the case currently. If that changes, we must fix this.
1014  *
1015  * Return: true if the rx pools can be reused, false otherwise.
1016  */
1017 static bool reuse_rx_pools(struct ibmvnic_adapter *adapter)
1018 {
1019         u64 old_num_pools, new_num_pools;
1020         u64 old_pool_size, new_pool_size;
1021         u64 old_buff_size, new_buff_size;
1022
1023         if (!adapter->rx_pool)
1024                 return false;
1025
1026         old_num_pools = adapter->num_active_rx_pools;
1027         new_num_pools = adapter->req_rx_queues;
1028
1029         old_pool_size = adapter->prev_rx_pool_size;
1030         new_pool_size = adapter->req_rx_add_entries_per_subcrq;
1031
1032         old_buff_size = adapter->prev_rx_buf_sz;
1033         new_buff_size = adapter->cur_rx_buf_sz;
1034
1035         if (old_buff_size != new_buff_size ||
1036             old_num_pools != new_num_pools ||
1037             old_pool_size != new_pool_size)
1038                 return false;
1039
1040         return true;
1041 }
1042
1043 /**
1044  * init_rx_pools(): Initialize the set of receiver pools in the adapter.
1045  * @netdev: net device associated with the vnic interface
1046  *
1047  * Initialize the set of receiver pools in the ibmvnic adapter associated
1048  * with the net_device @netdev. If possible, reuse the existing rx pools.
1049  * Otherwise free any existing pools and  allocate a new set of pools
1050  * before initializing them.
1051  *
1052  * Return: 0 on success and negative value on error.
1053  */
1054 static int init_rx_pools(struct net_device *netdev)
1055 {
1056         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1057         struct device *dev = &adapter->vdev->dev;
1058         struct ibmvnic_rx_pool *rx_pool;
1059         u64 num_pools;
1060         u64 pool_size;          /* # of buffers in one pool */
1061         u64 buff_size;
1062         int i, j, rc;
1063
1064         pool_size = adapter->req_rx_add_entries_per_subcrq;
1065         num_pools = adapter->req_rx_queues;
1066         buff_size = adapter->cur_rx_buf_sz;
1067
1068         if (reuse_rx_pools(adapter)) {
1069                 dev_dbg(dev, "Reusing rx pools\n");
1070                 goto update_ltb;
1071         }
1072
1073         /* Allocate/populate the pools. */
1074         release_rx_pools(adapter);
1075
1076         adapter->rx_pool = kcalloc(num_pools,
1077                                    sizeof(struct ibmvnic_rx_pool),
1078                                    GFP_KERNEL);
1079         if (!adapter->rx_pool) {
1080                 dev_err(dev, "Failed to allocate rx pools\n");
1081                 return -ENOMEM;
1082         }
1083
1084         /* Set num_active_rx_pools early. If we fail below after partial
1085          * allocation, release_rx_pools() will know how many to look for.
1086          */
1087         adapter->num_active_rx_pools = num_pools;
1088
1089         for (i = 0; i < num_pools; i++) {
1090                 rx_pool = &adapter->rx_pool[i];
1091
1092                 netdev_dbg(adapter->netdev,
1093                            "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n",
1094                            i, pool_size, buff_size);
1095
1096                 rx_pool->size = pool_size;
1097                 rx_pool->index = i;
1098                 rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
1099
1100                 rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
1101                                             GFP_KERNEL);
1102                 if (!rx_pool->free_map) {
1103                         dev_err(dev, "Couldn't alloc free_map %d\n", i);
1104                         rc = -ENOMEM;
1105                         goto out_release;
1106                 }
1107
1108                 rx_pool->rx_buff = kcalloc(rx_pool->size,
1109                                            sizeof(struct ibmvnic_rx_buff),
1110                                            GFP_KERNEL);
1111                 if (!rx_pool->rx_buff) {
1112                         dev_err(dev, "Couldn't alloc rx buffers\n");
1113                         rc = -ENOMEM;
1114                         goto out_release;
1115                 }
1116         }
1117
1118         adapter->prev_rx_pool_size = pool_size;
1119         adapter->prev_rx_buf_sz = adapter->cur_rx_buf_sz;
1120
1121 update_ltb:
1122         for (i = 0; i < num_pools; i++) {
1123                 rx_pool = &adapter->rx_pool[i];
1124                 dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n",
1125                         i, rx_pool->size, rx_pool->buff_size);
1126
1127                 rc = alloc_ltb_set(adapter, &rx_pool->ltb_set,
1128                                    rx_pool->size, rx_pool->buff_size);
1129                 if (rc)
1130                         goto out;
1131
1132                 for (j = 0; j < rx_pool->size; ++j) {
1133                         struct ibmvnic_rx_buff *rx_buff;
1134
1135                         rx_pool->free_map[j] = j;
1136
1137                         /* NOTE: Don't clear rx_buff->skb here - will leak
1138                          * memory! replenish_rx_pool() will reuse skbs or
1139                          * allocate as necessary.
1140                          */
1141                         rx_buff = &rx_pool->rx_buff[j];
1142                         rx_buff->dma = 0;
1143                         rx_buff->data = 0;
1144                         rx_buff->size = 0;
1145                         rx_buff->pool_index = 0;
1146                 }
1147
1148                 /* Mark pool "empty" so replenish_rx_pools() will
1149                  * update the LTB info for each buffer
1150                  */
1151                 atomic_set(&rx_pool->available, 0);
1152                 rx_pool->next_alloc = 0;
1153                 rx_pool->next_free = 0;
1154                 /* replenish_rx_pool() may have called deactivate_rx_pools()
1155                  * on failover. Ensure pool is active now.
1156                  */
1157                 rx_pool->active = 1;
1158         }
1159         return 0;
1160 out_release:
1161         release_rx_pools(adapter);
1162 out:
1163         /* We failed to allocate one or more LTBs or map them on the VIOS.
1164          * Hold onto the pools and any LTBs that we did allocate/map.
1165          */
1166         return rc;
1167 }
1168
1169 static void release_vpd_data(struct ibmvnic_adapter *adapter)
1170 {
1171         if (!adapter->vpd)
1172                 return;
1173
1174         kfree(adapter->vpd->buff);
1175         kfree(adapter->vpd);
1176
1177         adapter->vpd = NULL;
1178 }
1179
1180 static void release_one_tx_pool(struct ibmvnic_adapter *adapter,
1181                                 struct ibmvnic_tx_pool *tx_pool)
1182 {
1183         kfree(tx_pool->tx_buff);
1184         kfree(tx_pool->free_map);
1185         free_ltb_set(adapter, &tx_pool->ltb_set);
1186 }
1187
1188 /**
1189  * release_tx_pools() - Release any tx pools attached to @adapter.
1190  * @adapter: ibmvnic adapter
1191  *
1192  * Safe to call this multiple times - even if no pools are attached.
1193  */
1194 static void release_tx_pools(struct ibmvnic_adapter *adapter)
1195 {
1196         int i;
1197
1198         /* init_tx_pools() ensures that ->tx_pool and ->tso_pool are
1199          * both NULL or both non-NULL. So we only need to check one.
1200          */
1201         if (!adapter->tx_pool)
1202                 return;
1203
1204         for (i = 0; i < adapter->num_active_tx_pools; i++) {
1205                 release_one_tx_pool(adapter, &adapter->tx_pool[i]);
1206                 release_one_tx_pool(adapter, &adapter->tso_pool[i]);
1207         }
1208
1209         kfree(adapter->tx_pool);
1210         adapter->tx_pool = NULL;
1211         kfree(adapter->tso_pool);
1212         adapter->tso_pool = NULL;
1213         adapter->num_active_tx_pools = 0;
1214         adapter->prev_tx_pool_size = 0;
1215 }
1216
1217 static int init_one_tx_pool(struct net_device *netdev,
1218                             struct ibmvnic_tx_pool *tx_pool,
1219                             int pool_size, int buf_size)
1220 {
1221         int i;
1222
1223         tx_pool->tx_buff = kcalloc(pool_size,
1224                                    sizeof(struct ibmvnic_tx_buff),
1225                                    GFP_KERNEL);
1226         if (!tx_pool->tx_buff)
1227                 return -ENOMEM;
1228
1229         tx_pool->free_map = kcalloc(pool_size, sizeof(int), GFP_KERNEL);
1230         if (!tx_pool->free_map) {
1231                 kfree(tx_pool->tx_buff);
1232                 tx_pool->tx_buff = NULL;
1233                 return -ENOMEM;
1234         }
1235
1236         for (i = 0; i < pool_size; i++)
1237                 tx_pool->free_map[i] = i;
1238
1239         tx_pool->consumer_index = 0;
1240         tx_pool->producer_index = 0;
1241         tx_pool->num_buffers = pool_size;
1242         tx_pool->buf_size = buf_size;
1243
1244         return 0;
1245 }
1246
1247 /**
1248  * reuse_tx_pools() - Check if the existing tx pools can be reused.
1249  * @adapter: ibmvnic adapter
1250  *
1251  * Check if the existing tx pools in the adapter can be reused. The
1252  * pools can be reused if the pool parameters (number of pools,
1253  * number of buffers in the pool and mtu) have not changed.
1254  *
1255  * NOTE: This assumes that all pools have the same number of buffers
1256  *       which is the case currently. If that changes, we must fix this.
1257  *
1258  * Return: true if the tx pools can be reused, false otherwise.
1259  */
1260 static bool reuse_tx_pools(struct ibmvnic_adapter *adapter)
1261 {
1262         u64 old_num_pools, new_num_pools;
1263         u64 old_pool_size, new_pool_size;
1264         u64 old_mtu, new_mtu;
1265
1266         if (!adapter->tx_pool)
1267                 return false;
1268
1269         old_num_pools = adapter->num_active_tx_pools;
1270         new_num_pools = adapter->num_active_tx_scrqs;
1271         old_pool_size = adapter->prev_tx_pool_size;
1272         new_pool_size = adapter->req_tx_entries_per_subcrq;
1273         old_mtu = adapter->prev_mtu;
1274         new_mtu = adapter->req_mtu;
1275
1276         if (old_mtu != new_mtu ||
1277             old_num_pools != new_num_pools ||
1278             old_pool_size != new_pool_size)
1279                 return false;
1280
1281         return true;
1282 }
1283
1284 /**
1285  * init_tx_pools(): Initialize the set of transmit pools in the adapter.
1286  * @netdev: net device associated with the vnic interface
1287  *
1288  * Initialize the set of transmit pools in the ibmvnic adapter associated
1289  * with the net_device @netdev. If possible, reuse the existing tx pools.
1290  * Otherwise free any existing pools and  allocate a new set of pools
1291  * before initializing them.
1292  *
1293  * Return: 0 on success and negative value on error.
1294  */
1295 static int init_tx_pools(struct net_device *netdev)
1296 {
1297         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1298         struct device *dev = &adapter->vdev->dev;
1299         int num_pools;
1300         u64 pool_size;          /* # of buffers in pool */
1301         u64 buff_size;
1302         int i, j, rc;
1303
1304         num_pools = adapter->req_tx_queues;
1305
1306         /* We must notify the VIOS about the LTB on all resets - but we only
1307          * need to alloc/populate pools if either the number of buffers or
1308          * size of each buffer in the pool has changed.
1309          */
1310         if (reuse_tx_pools(adapter)) {
1311                 netdev_dbg(netdev, "Reusing tx pools\n");
1312                 goto update_ltb;
1313         }
1314
1315         /* Allocate/populate the pools. */
1316         release_tx_pools(adapter);
1317
1318         pool_size = adapter->req_tx_entries_per_subcrq;
1319         num_pools = adapter->num_active_tx_scrqs;
1320
1321         adapter->tx_pool = kcalloc(num_pools,
1322                                    sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
1323         if (!adapter->tx_pool)
1324                 return -ENOMEM;
1325
1326         adapter->tso_pool = kcalloc(num_pools,
1327                                     sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
1328         /* To simplify release_tx_pools() ensure that ->tx_pool and
1329          * ->tso_pool are either both NULL or both non-NULL.
1330          */
1331         if (!adapter->tso_pool) {
1332                 kfree(adapter->tx_pool);
1333                 adapter->tx_pool = NULL;
1334                 return -ENOMEM;
1335         }
1336
1337         /* Set num_active_tx_pools early. If we fail below after partial
1338          * allocation, release_tx_pools() will know how many to look for.
1339          */
1340         adapter->num_active_tx_pools = num_pools;
1341
1342         buff_size = adapter->req_mtu + VLAN_HLEN;
1343         buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
1344
1345         for (i = 0; i < num_pools; i++) {
1346                 dev_dbg(dev, "Init tx pool %d [%llu, %llu]\n",
1347                         i, adapter->req_tx_entries_per_subcrq, buff_size);
1348
1349                 rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
1350                                       pool_size, buff_size);
1351                 if (rc)
1352                         goto out_release;
1353
1354                 rc = init_one_tx_pool(netdev, &adapter->tso_pool[i],
1355                                       IBMVNIC_TSO_BUFS,
1356                                       IBMVNIC_TSO_BUF_SZ);
1357                 if (rc)
1358                         goto out_release;
1359         }
1360
1361         adapter->prev_tx_pool_size = pool_size;
1362         adapter->prev_mtu = adapter->req_mtu;
1363
1364 update_ltb:
1365         /* NOTE: All tx_pools have the same number of buffers (which is
1366          *       same as pool_size). All tso_pools have IBMVNIC_TSO_BUFS
1367          *       buffers (see calls init_one_tx_pool() for these).
1368          *       For consistency, we use tx_pool->num_buffers and
1369          *       tso_pool->num_buffers below.
1370          */
1371         rc = -1;
1372         for (i = 0; i < num_pools; i++) {
1373                 struct ibmvnic_tx_pool *tso_pool;
1374                 struct ibmvnic_tx_pool *tx_pool;
1375
1376                 tx_pool = &adapter->tx_pool[i];
1377
1378                 dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n",
1379                         i, tx_pool->num_buffers, tx_pool->buf_size);
1380
1381                 rc = alloc_ltb_set(adapter, &tx_pool->ltb_set,
1382                                    tx_pool->num_buffers, tx_pool->buf_size);
1383                 if (rc)
1384                         goto out;
1385
1386                 tx_pool->consumer_index = 0;
1387                 tx_pool->producer_index = 0;
1388
1389                 for (j = 0; j < tx_pool->num_buffers; j++)
1390                         tx_pool->free_map[j] = j;
1391
1392                 tso_pool = &adapter->tso_pool[i];
1393
1394                 dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n",
1395                         i, tso_pool->num_buffers, tso_pool->buf_size);
1396
1397                 rc = alloc_ltb_set(adapter, &tso_pool->ltb_set,
1398                                    tso_pool->num_buffers, tso_pool->buf_size);
1399                 if (rc)
1400                         goto out;
1401
1402                 tso_pool->consumer_index = 0;
1403                 tso_pool->producer_index = 0;
1404
1405                 for (j = 0; j < tso_pool->num_buffers; j++)
1406                         tso_pool->free_map[j] = j;
1407         }
1408
1409         return 0;
1410 out_release:
1411         release_tx_pools(adapter);
1412 out:
1413         /* We failed to allocate one or more LTBs or map them on the VIOS.
1414          * Hold onto the pools and any LTBs that we did allocate/map.
1415          */
1416         return rc;
1417 }
1418
1419 static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter)
1420 {
1421         int i;
1422
1423         if (adapter->napi_enabled)
1424                 return;
1425
1426         for (i = 0; i < adapter->req_rx_queues; i++)
1427                 napi_enable(&adapter->napi[i]);
1428
1429         adapter->napi_enabled = true;
1430 }
1431
1432 static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter)
1433 {
1434         int i;
1435
1436         if (!adapter->napi_enabled)
1437                 return;
1438
1439         for (i = 0; i < adapter->req_rx_queues; i++) {
1440                 netdev_dbg(adapter->netdev, "Disabling napi[%d]\n", i);
1441                 napi_disable(&adapter->napi[i]);
1442         }
1443
1444         adapter->napi_enabled = false;
1445 }
1446
1447 static int init_napi(struct ibmvnic_adapter *adapter)
1448 {
1449         int i;
1450
1451         adapter->napi = kcalloc(adapter->req_rx_queues,
1452                                 sizeof(struct napi_struct), GFP_KERNEL);
1453         if (!adapter->napi)
1454                 return -ENOMEM;
1455
1456         for (i = 0; i < adapter->req_rx_queues; i++) {
1457                 netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i);
1458                 netif_napi_add(adapter->netdev, &adapter->napi[i],
1459                                ibmvnic_poll);
1460         }
1461
1462         adapter->num_active_rx_napi = adapter->req_rx_queues;
1463         return 0;
1464 }
1465
1466 static void release_napi(struct ibmvnic_adapter *adapter)
1467 {
1468         int i;
1469
1470         if (!adapter->napi)
1471                 return;
1472
1473         for (i = 0; i < adapter->num_active_rx_napi; i++) {
1474                 netdev_dbg(adapter->netdev, "Releasing napi[%d]\n", i);
1475                 netif_napi_del(&adapter->napi[i]);
1476         }
1477
1478         kfree(adapter->napi);
1479         adapter->napi = NULL;
1480         adapter->num_active_rx_napi = 0;
1481         adapter->napi_enabled = false;
1482 }
1483
1484 static const char *adapter_state_to_string(enum vnic_state state)
1485 {
1486         switch (state) {
1487         case VNIC_PROBING:
1488                 return "PROBING";
1489         case VNIC_PROBED:
1490                 return "PROBED";
1491         case VNIC_OPENING:
1492                 return "OPENING";
1493         case VNIC_OPEN:
1494                 return "OPEN";
1495         case VNIC_CLOSING:
1496                 return "CLOSING";
1497         case VNIC_CLOSED:
1498                 return "CLOSED";
1499         case VNIC_REMOVING:
1500                 return "REMOVING";
1501         case VNIC_REMOVED:
1502                 return "REMOVED";
1503         case VNIC_DOWN:
1504                 return "DOWN";
1505         }
1506         return "UNKNOWN";
1507 }
1508
1509 static int ibmvnic_login(struct net_device *netdev)
1510 {
1511         unsigned long flags, timeout = msecs_to_jiffies(20000);
1512         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1513         int retry_count = 0;
1514         int retries = 10;
1515         bool retry;
1516         int rc;
1517
1518         do {
1519                 retry = false;
1520                 if (retry_count > retries) {
1521                         netdev_warn(netdev, "Login attempts exceeded\n");
1522                         return -EACCES;
1523                 }
1524
1525                 adapter->init_done_rc = 0;
1526                 reinit_completion(&adapter->init_done);
1527                 rc = send_login(adapter);
1528                 if (rc)
1529                         return rc;
1530
1531                 if (!wait_for_completion_timeout(&adapter->init_done,
1532                                                  timeout)) {
1533                         netdev_warn(netdev, "Login timed out\n");
1534                         adapter->login_pending = false;
1535                         goto partial_reset;
1536                 }
1537
1538                 if (adapter->init_done_rc == ABORTED) {
1539                         netdev_warn(netdev, "Login aborted, retrying...\n");
1540                         retry = true;
1541                         adapter->init_done_rc = 0;
1542                         retry_count++;
1543                         /* FW or device may be busy, so
1544                          * wait a bit before retrying login
1545                          */
1546                         msleep(500);
1547                 } else if (adapter->init_done_rc == PARTIALSUCCESS) {
1548                         retry_count++;
1549                         release_sub_crqs(adapter, 1);
1550
1551                         retry = true;
1552                         netdev_dbg(netdev,
1553                                    "Received partial success, retrying...\n");
1554                         adapter->init_done_rc = 0;
1555                         reinit_completion(&adapter->init_done);
1556                         send_query_cap(adapter);
1557                         if (!wait_for_completion_timeout(&adapter->init_done,
1558                                                          timeout)) {
1559                                 netdev_warn(netdev,
1560                                             "Capabilities query timed out\n");
1561                                 return -ETIMEDOUT;
1562                         }
1563
1564                         rc = init_sub_crqs(adapter);
1565                         if (rc) {
1566                                 netdev_warn(netdev,
1567                                             "SCRQ initialization failed\n");
1568                                 return rc;
1569                         }
1570
1571                         rc = init_sub_crq_irqs(adapter);
1572                         if (rc) {
1573                                 netdev_warn(netdev,
1574                                             "SCRQ irq initialization failed\n");
1575                                 return rc;
1576                         }
1577                 /* Default/timeout error handling, reset and start fresh */
1578                 } else if (adapter->init_done_rc) {
1579                         netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
1580                                     adapter->init_done_rc);
1581
1582 partial_reset:
1583                         /* adapter login failed, so free any CRQs or sub-CRQs
1584                          * and register again before attempting to login again.
1585                          * If we don't do this then the VIOS may think that
1586                          * we are already logged in and reject any subsequent
1587                          * attempts
1588                          */
1589                         netdev_warn(netdev,
1590                                     "Freeing and re-registering CRQs before attempting to login again\n");
1591                         retry = true;
1592                         adapter->init_done_rc = 0;
1593                         release_sub_crqs(adapter, true);
1594                         /* Much of this is similar logic as ibmvnic_probe(),
1595                          * we are essentially re-initializing communication
1596                          * with the server. We really should not run any
1597                          * resets/failovers here because this is already a form
1598                          * of reset and we do not want parallel resets occurring
1599                          */
1600                         do {
1601                                 reinit_init_done(adapter);
1602                                 /* Clear any failovers we got in the previous
1603                                  * pass since we are re-initializing the CRQ
1604                                  */
1605                                 adapter->failover_pending = false;
1606                                 release_crq_queue(adapter);
1607                                 /* If we don't sleep here then we risk an
1608                                  * unnecessary failover event from the VIOS.
1609                                  * This is a known VIOS issue caused by a vnic
1610                                  * device freeing and registering a CRQ too
1611                                  * quickly.
1612                                  */
1613                                 msleep(1500);
1614                                 /* Avoid any resets, since we are currently
1615                                  * resetting.
1616                                  */
1617                                 spin_lock_irqsave(&adapter->rwi_lock, flags);
1618                                 flush_reset_queue(adapter);
1619                                 spin_unlock_irqrestore(&adapter->rwi_lock,
1620                                                        flags);
1621
1622                                 rc = init_crq_queue(adapter);
1623                                 if (rc) {
1624                                         netdev_err(netdev, "login recovery: init CRQ failed %d\n",
1625                                                    rc);
1626                                         return -EIO;
1627                                 }
1628
1629                                 rc = ibmvnic_reset_init(adapter, false);
1630                                 if (rc)
1631                                         netdev_err(netdev, "login recovery: Reset init failed %d\n",
1632                                                    rc);
1633                                 /* IBMVNIC_CRQ_INIT will return EAGAIN if it
1634                                  * fails, since ibmvnic_reset_init will free
1635                                  * irq's in failure, we won't be able to receive
1636                                  * new CRQs so we need to keep trying. probe()
1637                                  * handles this similarly.
1638                                  */
1639                         } while (rc == -EAGAIN && retry_count++ < retries);
1640                 }
1641         } while (retry);
1642
1643         __ibmvnic_set_mac(netdev, adapter->mac_addr);
1644
1645         netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state));
1646         return 0;
1647 }
1648
1649 static void release_login_buffer(struct ibmvnic_adapter *adapter)
1650 {
1651         if (!adapter->login_buf)
1652                 return;
1653
1654         dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
1655                          adapter->login_buf_sz, DMA_TO_DEVICE);
1656         kfree(adapter->login_buf);
1657         adapter->login_buf = NULL;
1658 }
1659
1660 static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
1661 {
1662         if (!adapter->login_rsp_buf)
1663                 return;
1664
1665         dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
1666                          adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
1667         kfree(adapter->login_rsp_buf);
1668         adapter->login_rsp_buf = NULL;
1669 }
1670
1671 static void release_resources(struct ibmvnic_adapter *adapter)
1672 {
1673         release_vpd_data(adapter);
1674
1675         release_napi(adapter);
1676         release_login_buffer(adapter);
1677         release_login_rsp_buffer(adapter);
1678 }
1679
1680 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
1681 {
1682         struct net_device *netdev = adapter->netdev;
1683         unsigned long timeout = msecs_to_jiffies(20000);
1684         union ibmvnic_crq crq;
1685         bool resend;
1686         int rc;
1687
1688         netdev_dbg(netdev, "setting link state %d\n", link_state);
1689
1690         memset(&crq, 0, sizeof(crq));
1691         crq.logical_link_state.first = IBMVNIC_CRQ_CMD;
1692         crq.logical_link_state.cmd = LOGICAL_LINK_STATE;
1693         crq.logical_link_state.link_state = link_state;
1694
1695         do {
1696                 resend = false;
1697
1698                 reinit_completion(&adapter->init_done);
1699                 rc = ibmvnic_send_crq(adapter, &crq);
1700                 if (rc) {
1701                         netdev_err(netdev, "Failed to set link state\n");
1702                         return rc;
1703                 }
1704
1705                 if (!wait_for_completion_timeout(&adapter->init_done,
1706                                                  timeout)) {
1707                         netdev_err(netdev, "timeout setting link state\n");
1708                         return -ETIMEDOUT;
1709                 }
1710
1711                 if (adapter->init_done_rc == PARTIALSUCCESS) {
1712                         /* Partuial success, delay and re-send */
1713                         mdelay(1000);
1714                         resend = true;
1715                 } else if (adapter->init_done_rc) {
1716                         netdev_warn(netdev, "Unable to set link state, rc=%d\n",
1717                                     adapter->init_done_rc);
1718                         return adapter->init_done_rc;
1719                 }
1720         } while (resend);
1721
1722         return 0;
1723 }
1724
1725 static int set_real_num_queues(struct net_device *netdev)
1726 {
1727         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1728         int rc;
1729
1730         netdev_dbg(netdev, "Setting real tx/rx queues (%llx/%llx)\n",
1731                    adapter->req_tx_queues, adapter->req_rx_queues);
1732
1733         rc = netif_set_real_num_tx_queues(netdev, adapter->req_tx_queues);
1734         if (rc) {
1735                 netdev_err(netdev, "failed to set the number of tx queues\n");
1736                 return rc;
1737         }
1738
1739         rc = netif_set_real_num_rx_queues(netdev, adapter->req_rx_queues);
1740         if (rc)
1741                 netdev_err(netdev, "failed to set the number of rx queues\n");
1742
1743         return rc;
1744 }
1745
1746 static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
1747 {
1748         struct device *dev = &adapter->vdev->dev;
1749         union ibmvnic_crq crq;
1750         int len = 0;
1751         int rc;
1752
1753         if (adapter->vpd->buff)
1754                 len = adapter->vpd->len;
1755
1756         mutex_lock(&adapter->fw_lock);
1757         adapter->fw_done_rc = 0;
1758         reinit_completion(&adapter->fw_done);
1759
1760         crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
1761         crq.get_vpd_size.cmd = GET_VPD_SIZE;
1762         rc = ibmvnic_send_crq(adapter, &crq);
1763         if (rc) {
1764                 mutex_unlock(&adapter->fw_lock);
1765                 return rc;
1766         }
1767
1768         rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
1769         if (rc) {
1770                 dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc);
1771                 mutex_unlock(&adapter->fw_lock);
1772                 return rc;
1773         }
1774         mutex_unlock(&adapter->fw_lock);
1775
1776         if (!adapter->vpd->len)
1777                 return -ENODATA;
1778
1779         if (!adapter->vpd->buff)
1780                 adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL);
1781         else if (adapter->vpd->len != len)
1782                 adapter->vpd->buff =
1783                         krealloc(adapter->vpd->buff,
1784                                  adapter->vpd->len, GFP_KERNEL);
1785
1786         if (!adapter->vpd->buff) {
1787                 dev_err(dev, "Could allocate VPD buffer\n");
1788                 return -ENOMEM;
1789         }
1790
1791         adapter->vpd->dma_addr =
1792                 dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len,
1793                                DMA_FROM_DEVICE);
1794         if (dma_mapping_error(dev, adapter->vpd->dma_addr)) {
1795                 dev_err(dev, "Could not map VPD buffer\n");
1796                 kfree(adapter->vpd->buff);
1797                 adapter->vpd->buff = NULL;
1798                 return -ENOMEM;
1799         }
1800
1801         mutex_lock(&adapter->fw_lock);
1802         adapter->fw_done_rc = 0;
1803         reinit_completion(&adapter->fw_done);
1804
1805         crq.get_vpd.first = IBMVNIC_CRQ_CMD;
1806         crq.get_vpd.cmd = GET_VPD;
1807         crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
1808         crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len);
1809         rc = ibmvnic_send_crq(adapter, &crq);
1810         if (rc) {
1811                 kfree(adapter->vpd->buff);
1812                 adapter->vpd->buff = NULL;
1813                 mutex_unlock(&adapter->fw_lock);
1814                 return rc;
1815         }
1816
1817         rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
1818         if (rc) {
1819                 dev_err(dev, "Unable to retrieve VPD, rc = %d\n", rc);
1820                 kfree(adapter->vpd->buff);
1821                 adapter->vpd->buff = NULL;
1822                 mutex_unlock(&adapter->fw_lock);
1823                 return rc;
1824         }
1825
1826         mutex_unlock(&adapter->fw_lock);
1827         return 0;
1828 }
1829
1830 static int init_resources(struct ibmvnic_adapter *adapter)
1831 {
1832         struct net_device *netdev = adapter->netdev;
1833         int rc;
1834
1835         rc = set_real_num_queues(netdev);
1836         if (rc)
1837                 return rc;
1838
1839         adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
1840         if (!adapter->vpd)
1841                 return -ENOMEM;
1842
1843         /* Vital Product Data (VPD) */
1844         rc = ibmvnic_get_vpd(adapter);
1845         if (rc) {
1846                 netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
1847                 return rc;
1848         }
1849
1850         rc = init_napi(adapter);
1851         if (rc)
1852                 return rc;
1853
1854         send_query_map(adapter);
1855
1856         rc = init_rx_pools(netdev);
1857         if (rc)
1858                 return rc;
1859
1860         rc = init_tx_pools(netdev);
1861         return rc;
1862 }
1863
1864 static int __ibmvnic_open(struct net_device *netdev)
1865 {
1866         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1867         enum vnic_state prev_state = adapter->state;
1868         int i, rc;
1869
1870         adapter->state = VNIC_OPENING;
1871         replenish_pools(adapter);
1872         ibmvnic_napi_enable(adapter);
1873
1874         /* We're ready to receive frames, enable the sub-crq interrupts and
1875          * set the logical link state to up
1876          */
1877         for (i = 0; i < adapter->req_rx_queues; i++) {
1878                 netdev_dbg(netdev, "Enabling rx_scrq[%d] irq\n", i);
1879                 if (prev_state == VNIC_CLOSED)
1880                         enable_irq(adapter->rx_scrq[i]->irq);
1881                 enable_scrq_irq(adapter, adapter->rx_scrq[i]);
1882         }
1883
1884         for (i = 0; i < adapter->req_tx_queues; i++) {
1885                 netdev_dbg(netdev, "Enabling tx_scrq[%d] irq\n", i);
1886                 if (prev_state == VNIC_CLOSED)
1887                         enable_irq(adapter->tx_scrq[i]->irq);
1888                 enable_scrq_irq(adapter, adapter->tx_scrq[i]);
1889                 /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL
1890                  * resets, don't reset the stats because there could be batched
1891                  * skb's waiting to be sent. If we reset dql stats, we risk
1892                  * num_completed being greater than num_queued. This will cause
1893                  * a BUG_ON in dql_completed().
1894                  */
1895                 if (adapter->reset_reason != VNIC_RESET_NON_FATAL)
1896                         netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
1897         }
1898
1899         rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
1900         if (rc) {
1901                 ibmvnic_napi_disable(adapter);
1902                 ibmvnic_disable_irqs(adapter);
1903                 return rc;
1904         }
1905
1906         adapter->tx_queues_active = true;
1907
1908         /* Since queues were stopped until now, there shouldn't be any
1909          * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we
1910          * don't need the synchronize_rcu()? Leaving it for consistency
1911          * with setting ->tx_queues_active = false.
1912          */
1913         synchronize_rcu();
1914
1915         netif_tx_start_all_queues(netdev);
1916
1917         if (prev_state == VNIC_CLOSED) {
1918                 for (i = 0; i < adapter->req_rx_queues; i++)
1919                         napi_schedule(&adapter->napi[i]);
1920         }
1921
1922         adapter->state = VNIC_OPEN;
1923         return rc;
1924 }
1925
1926 static int ibmvnic_open(struct net_device *netdev)
1927 {
1928         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
1929         int rc;
1930
1931         ASSERT_RTNL();
1932
1933         /* If device failover is pending or we are about to reset, just set
1934          * device state and return. Device operation will be handled by reset
1935          * routine.
1936          *
1937          * It should be safe to overwrite the adapter->state here. Since
1938          * we hold the rtnl, either the reset has not actually started or
1939          * the rtnl got dropped during the set_link_state() in do_reset().
1940          * In the former case, no one else is changing the state (again we
1941          * have the rtnl) and in the latter case, do_reset() will detect and
1942          * honor our setting below.
1943          */
1944         if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) {
1945                 netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n",
1946                            adapter_state_to_string(adapter->state),
1947                            adapter->failover_pending);
1948                 adapter->state = VNIC_OPEN;
1949                 rc = 0;
1950                 goto out;
1951         }
1952
1953         if (adapter->state != VNIC_CLOSED) {
1954                 rc = ibmvnic_login(netdev);
1955                 if (rc)
1956                         goto out;
1957
1958                 rc = init_resources(adapter);
1959                 if (rc) {
1960                         netdev_err(netdev, "failed to initialize resources\n");
1961                         goto out;
1962                 }
1963         }
1964
1965         rc = __ibmvnic_open(netdev);
1966
1967 out:
1968         /* If open failed and there is a pending failover or in-progress reset,
1969          * set device state and return. Device operation will be handled by
1970          * reset routine. See also comments above regarding rtnl.
1971          */
1972         if (rc &&
1973             (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) {
1974                 adapter->state = VNIC_OPEN;
1975                 rc = 0;
1976         }
1977
1978         if (rc) {
1979                 release_resources(adapter);
1980                 release_rx_pools(adapter);
1981                 release_tx_pools(adapter);
1982         }
1983
1984         return rc;
1985 }
1986
1987 static void clean_rx_pools(struct ibmvnic_adapter *adapter)
1988 {
1989         struct ibmvnic_rx_pool *rx_pool;
1990         struct ibmvnic_rx_buff *rx_buff;
1991         u64 rx_entries;
1992         int rx_scrqs;
1993         int i, j;
1994
1995         if (!adapter->rx_pool)
1996                 return;
1997
1998         rx_scrqs = adapter->num_active_rx_pools;
1999         rx_entries = adapter->req_rx_add_entries_per_subcrq;
2000
2001         /* Free any remaining skbs in the rx buffer pools */
2002         for (i = 0; i < rx_scrqs; i++) {
2003                 rx_pool = &adapter->rx_pool[i];
2004                 if (!rx_pool || !rx_pool->rx_buff)
2005                         continue;
2006
2007                 netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
2008                 for (j = 0; j < rx_entries; j++) {
2009                         rx_buff = &rx_pool->rx_buff[j];
2010                         if (rx_buff && rx_buff->skb) {
2011                                 dev_kfree_skb_any(rx_buff->skb);
2012                                 rx_buff->skb = NULL;
2013                         }
2014                 }
2015         }
2016 }
2017
2018 static void clean_one_tx_pool(struct ibmvnic_adapter *adapter,
2019                               struct ibmvnic_tx_pool *tx_pool)
2020 {
2021         struct ibmvnic_tx_buff *tx_buff;
2022         u64 tx_entries;
2023         int i;
2024
2025         if (!tx_pool || !tx_pool->tx_buff)
2026                 return;
2027
2028         tx_entries = tx_pool->num_buffers;
2029
2030         for (i = 0; i < tx_entries; i++) {
2031                 tx_buff = &tx_pool->tx_buff[i];
2032                 if (tx_buff && tx_buff->skb) {
2033                         dev_kfree_skb_any(tx_buff->skb);
2034                         tx_buff->skb = NULL;
2035                 }
2036         }
2037 }
2038
2039 static void clean_tx_pools(struct ibmvnic_adapter *adapter)
2040 {
2041         int tx_scrqs;
2042         int i;
2043
2044         if (!adapter->tx_pool || !adapter->tso_pool)
2045                 return;
2046
2047         tx_scrqs = adapter->num_active_tx_pools;
2048
2049         /* Free any remaining skbs in the tx buffer pools */
2050         for (i = 0; i < tx_scrqs; i++) {
2051                 netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
2052                 clean_one_tx_pool(adapter, &adapter->tx_pool[i]);
2053                 clean_one_tx_pool(adapter, &adapter->tso_pool[i]);
2054         }
2055 }
2056
2057 static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter)
2058 {
2059         struct net_device *netdev = adapter->netdev;
2060         int i;
2061
2062         if (adapter->tx_scrq) {
2063                 for (i = 0; i < adapter->req_tx_queues; i++)
2064                         if (adapter->tx_scrq[i]->irq) {
2065                                 netdev_dbg(netdev,
2066                                            "Disabling tx_scrq[%d] irq\n", i);
2067                                 disable_scrq_irq(adapter, adapter->tx_scrq[i]);
2068                                 disable_irq(adapter->tx_scrq[i]->irq);
2069                         }
2070         }
2071
2072         if (adapter->rx_scrq) {
2073                 for (i = 0; i < adapter->req_rx_queues; i++) {
2074                         if (adapter->rx_scrq[i]->irq) {
2075                                 netdev_dbg(netdev,
2076                                            "Disabling rx_scrq[%d] irq\n", i);
2077                                 disable_scrq_irq(adapter, adapter->rx_scrq[i]);
2078                                 disable_irq(adapter->rx_scrq[i]->irq);
2079                         }
2080                 }
2081         }
2082 }
2083
2084 static void ibmvnic_cleanup(struct net_device *netdev)
2085 {
2086         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2087
2088         /* ensure that transmissions are stopped if called by do_reset */
2089
2090         adapter->tx_queues_active = false;
2091
2092         /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active
2093          * update so they don't restart a queue after we stop it below.
2094          */
2095         synchronize_rcu();
2096
2097         if (test_bit(0, &adapter->resetting))
2098                 netif_tx_disable(netdev);
2099         else
2100                 netif_tx_stop_all_queues(netdev);
2101
2102         ibmvnic_napi_disable(adapter);
2103         ibmvnic_disable_irqs(adapter);
2104 }
2105
2106 static int __ibmvnic_close(struct net_device *netdev)
2107 {
2108         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2109         int rc = 0;
2110
2111         adapter->state = VNIC_CLOSING;
2112         rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
2113         adapter->state = VNIC_CLOSED;
2114         return rc;
2115 }
2116
2117 static int ibmvnic_close(struct net_device *netdev)
2118 {
2119         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2120         int rc;
2121
2122         netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n",
2123                    adapter_state_to_string(adapter->state),
2124                    adapter->failover_pending,
2125                    adapter->force_reset_recovery);
2126
2127         /* If device failover is pending, just set device state and return.
2128          * Device operation will be handled by reset routine.
2129          */
2130         if (adapter->failover_pending) {
2131                 adapter->state = VNIC_CLOSED;
2132                 return 0;
2133         }
2134
2135         rc = __ibmvnic_close(netdev);
2136         ibmvnic_cleanup(netdev);
2137         clean_rx_pools(adapter);
2138         clean_tx_pools(adapter);
2139
2140         return rc;
2141 }
2142
2143 /**
2144  * get_hdr_lens - fills list of L2/L3/L4 hdr lens
2145  * @hdr_field: bitfield determining needed headers
2146  * @skb: socket buffer
2147  * @hdr_len: array of header lengths to be filled
2148  *
2149  * Reads hdr_field to determine which headers are needed by firmware.
2150  * Builds a buffer containing these headers.  Saves individual header
2151  * lengths and total buffer length to be used to build descriptors.
2152  *
2153  * Return: total len of all headers
2154  */
2155 static int get_hdr_lens(u8 hdr_field, struct sk_buff *skb,
2156                         int *hdr_len)
2157 {
2158         int len = 0;
2159
2160
2161         if ((hdr_field >> 6) & 1) {
2162                 hdr_len[0] = skb_mac_header_len(skb);
2163                 len += hdr_len[0];
2164         }
2165
2166         if ((hdr_field >> 5) & 1) {
2167                 hdr_len[1] = skb_network_header_len(skb);
2168                 len += hdr_len[1];
2169         }
2170
2171         if (!((hdr_field >> 4) & 1))
2172                 return len;
2173
2174         if (skb->protocol == htons(ETH_P_IP)) {
2175                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
2176                         hdr_len[2] = tcp_hdrlen(skb);
2177                 else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
2178                         hdr_len[2] = sizeof(struct udphdr);
2179         } else if (skb->protocol == htons(ETH_P_IPV6)) {
2180                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
2181                         hdr_len[2] = tcp_hdrlen(skb);
2182                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
2183                         hdr_len[2] = sizeof(struct udphdr);
2184         }
2185
2186         return len + hdr_len[2];
2187 }
2188
2189 /**
2190  * create_hdr_descs - create header and header extension descriptors
2191  * @hdr_field: bitfield determining needed headers
2192  * @hdr_data: buffer containing header data
2193  * @len: length of data buffer
2194  * @hdr_len: array of individual header lengths
2195  * @scrq_arr: descriptor array
2196  *
2197  * Creates header and, if needed, header extension descriptors and
2198  * places them in a descriptor array, scrq_arr
2199  *
2200  * Return: Number of header descs
2201  */
2202
2203 static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
2204                             union sub_crq *scrq_arr)
2205 {
2206         union sub_crq *hdr_desc;
2207         int tmp_len = len;
2208         int num_descs = 0;
2209         u8 *data, *cur;
2210         int tmp;
2211
2212         while (tmp_len > 0) {
2213                 cur = hdr_data + len - tmp_len;
2214
2215                 hdr_desc = &scrq_arr[num_descs];
2216                 if (num_descs) {
2217                         data = hdr_desc->hdr_ext.data;
2218                         tmp = tmp_len > 29 ? 29 : tmp_len;
2219                         hdr_desc->hdr_ext.first = IBMVNIC_CRQ_CMD;
2220                         hdr_desc->hdr_ext.type = IBMVNIC_HDR_EXT_DESC;
2221                         hdr_desc->hdr_ext.len = tmp;
2222                 } else {
2223                         data = hdr_desc->hdr.data;
2224                         tmp = tmp_len > 24 ? 24 : tmp_len;
2225                         hdr_desc->hdr.first = IBMVNIC_CRQ_CMD;
2226                         hdr_desc->hdr.type = IBMVNIC_HDR_DESC;
2227                         hdr_desc->hdr.len = tmp;
2228                         hdr_desc->hdr.l2_len = (u8)hdr_len[0];
2229                         hdr_desc->hdr.l3_len = cpu_to_be16((u16)hdr_len[1]);
2230                         hdr_desc->hdr.l4_len = (u8)hdr_len[2];
2231                         hdr_desc->hdr.flag = hdr_field << 1;
2232                 }
2233                 memcpy(data, cur, tmp);
2234                 tmp_len -= tmp;
2235                 num_descs++;
2236         }
2237
2238         return num_descs;
2239 }
2240
2241 /**
2242  * build_hdr_descs_arr - build a header descriptor array
2243  * @skb: tx socket buffer
2244  * @indir_arr: indirect array
2245  * @num_entries: number of descriptors to be sent
2246  * @hdr_field: bit field determining which headers will be sent
2247  *
2248  * This function will build a TX descriptor array with applicable
2249  * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
2250  */
2251
2252 static void build_hdr_descs_arr(struct sk_buff *skb,
2253                                 union sub_crq *indir_arr,
2254                                 int *num_entries, u8 hdr_field)
2255 {
2256         int hdr_len[3] = {0, 0, 0};
2257         int tot_len;
2258
2259         tot_len = get_hdr_lens(hdr_field, skb, hdr_len);
2260         *num_entries += create_hdr_descs(hdr_field, skb_mac_header(skb),
2261                                          tot_len, hdr_len, indir_arr + 1);
2262 }
2263
2264 static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
2265                                     struct net_device *netdev)
2266 {
2267         /* For some backing devices, mishandling of small packets
2268          * can result in a loss of connection or TX stall. Device
2269          * architects recommend that no packet should be smaller
2270          * than the minimum MTU value provided to the driver, so
2271          * pad any packets to that length
2272          */
2273         if (skb->len < netdev->min_mtu)
2274                 return skb_put_padto(skb, netdev->min_mtu);
2275
2276         return 0;
2277 }
2278
2279 static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
2280                                          struct ibmvnic_sub_crq_queue *tx_scrq)
2281 {
2282         struct ibmvnic_ind_xmit_queue *ind_bufp;
2283         struct ibmvnic_tx_buff *tx_buff;
2284         struct ibmvnic_tx_pool *tx_pool;
2285         union sub_crq tx_scrq_entry;
2286         int queue_num;
2287         int entries;
2288         int index;
2289         int i;
2290
2291         ind_bufp = &tx_scrq->ind_buf;
2292         entries = (u64)ind_bufp->index;
2293         queue_num = tx_scrq->pool_index;
2294
2295         for (i = entries - 1; i >= 0; --i) {
2296                 tx_scrq_entry = ind_bufp->indir_arr[i];
2297                 if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC)
2298                         continue;
2299                 index = be32_to_cpu(tx_scrq_entry.v1.correlator);
2300                 if (index & IBMVNIC_TSO_POOL_MASK) {
2301                         tx_pool = &adapter->tso_pool[queue_num];
2302                         index &= ~IBMVNIC_TSO_POOL_MASK;
2303                 } else {
2304                         tx_pool = &adapter->tx_pool[queue_num];
2305                 }
2306                 tx_pool->free_map[tx_pool->consumer_index] = index;
2307                 tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
2308                                           tx_pool->num_buffers - 1 :
2309                                           tx_pool->consumer_index - 1;
2310                 tx_buff = &tx_pool->tx_buff[index];
2311                 adapter->netdev->stats.tx_packets--;
2312                 adapter->netdev->stats.tx_bytes -= tx_buff->skb->len;
2313                 adapter->tx_stats_buffers[queue_num].batched_packets--;
2314                 adapter->tx_stats_buffers[queue_num].bytes -=
2315                                                 tx_buff->skb->len;
2316                 dev_kfree_skb_any(tx_buff->skb);
2317                 tx_buff->skb = NULL;
2318                 adapter->netdev->stats.tx_dropped++;
2319         }
2320
2321         ind_bufp->index = 0;
2322
2323         if (atomic_sub_return(entries, &tx_scrq->used) <=
2324             (adapter->req_tx_entries_per_subcrq / 2) &&
2325             __netif_subqueue_stopped(adapter->netdev, queue_num)) {
2326                 rcu_read_lock();
2327
2328                 if (adapter->tx_queues_active) {
2329                         netif_wake_subqueue(adapter->netdev, queue_num);
2330                         netdev_dbg(adapter->netdev, "Started queue %d\n",
2331                                    queue_num);
2332                 }
2333
2334                 rcu_read_unlock();
2335         }
2336 }
2337
2338 static int send_subcrq_direct(struct ibmvnic_adapter *adapter,
2339                               u64 remote_handle, u64 *entry)
2340 {
2341         unsigned int ua = adapter->vdev->unit_address;
2342         struct device *dev = &adapter->vdev->dev;
2343         int rc;
2344
2345         /* Make sure the hypervisor sees the complete request */
2346         dma_wmb();
2347         rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua,
2348                                 cpu_to_be64(remote_handle),
2349                                 cpu_to_be64(entry[0]), cpu_to_be64(entry[1]),
2350                                 cpu_to_be64(entry[2]), cpu_to_be64(entry[3]));
2351
2352         if (rc)
2353                 print_subcrq_error(dev, rc, __func__);
2354
2355         return rc;
2356 }
2357
2358 static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
2359                                  struct ibmvnic_sub_crq_queue *tx_scrq,
2360                                  bool indirect)
2361 {
2362         struct ibmvnic_ind_xmit_queue *ind_bufp;
2363         u64 dma_addr;
2364         u64 entries;
2365         u64 handle;
2366         int rc;
2367
2368         ind_bufp = &tx_scrq->ind_buf;
2369         dma_addr = (u64)ind_bufp->indir_dma;
2370         entries = (u64)ind_bufp->index;
2371         handle = tx_scrq->handle;
2372
2373         if (!entries)
2374                 return 0;
2375
2376         if (indirect)
2377                 rc = send_subcrq_indirect(adapter, handle, dma_addr, entries);
2378         else
2379                 rc = send_subcrq_direct(adapter, handle,
2380                                         (u64 *)ind_bufp->indir_arr);
2381
2382         if (rc)
2383                 ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
2384         else
2385                 ind_bufp->index = 0;
2386         return rc;
2387 }
2388
2389 static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
2390 {
2391         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2392         int queue_num = skb_get_queue_mapping(skb);
2393         u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
2394         struct device *dev = &adapter->vdev->dev;
2395         struct ibmvnic_ind_xmit_queue *ind_bufp;
2396         struct ibmvnic_tx_buff *tx_buff = NULL;
2397         struct ibmvnic_sub_crq_queue *tx_scrq;
2398         struct ibmvnic_long_term_buff *ltb;
2399         struct ibmvnic_tx_pool *tx_pool;
2400         unsigned int tx_send_failed = 0;
2401         netdev_tx_t ret = NETDEV_TX_OK;
2402         unsigned int tx_map_failed = 0;
2403         union sub_crq indir_arr[16];
2404         unsigned int tx_dropped = 0;
2405         unsigned int tx_dpackets = 0;
2406         unsigned int tx_bpackets = 0;
2407         unsigned int tx_bytes = 0;
2408         dma_addr_t data_dma_addr;
2409         struct netdev_queue *txq;
2410         unsigned long lpar_rc;
2411         union sub_crq tx_crq;
2412         unsigned int offset;
2413         bool use_scrq_send_direct = false;
2414         int num_entries = 1;
2415         unsigned char *dst;
2416         int bufidx = 0;
2417         u8 proto = 0;
2418
2419         /* If a reset is in progress, drop the packet since
2420          * the scrqs may get torn down. Otherwise use the
2421          * rcu to ensure reset waits for us to complete.
2422          */
2423         rcu_read_lock();
2424         if (!adapter->tx_queues_active) {
2425                 dev_kfree_skb_any(skb);
2426
2427                 tx_send_failed++;
2428                 tx_dropped++;
2429                 ret = NETDEV_TX_OK;
2430                 goto out;
2431         }
2432
2433         tx_scrq = adapter->tx_scrq[queue_num];
2434         txq = netdev_get_tx_queue(netdev, queue_num);
2435         ind_bufp = &tx_scrq->ind_buf;
2436
2437         if (ibmvnic_xmit_workarounds(skb, netdev)) {
2438                 tx_dropped++;
2439                 tx_send_failed++;
2440                 ret = NETDEV_TX_OK;
2441                 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
2442                 if (lpar_rc != H_SUCCESS)
2443                         goto tx_err;
2444                 goto out;
2445         }
2446
2447         if (skb_is_gso(skb))
2448                 tx_pool = &adapter->tso_pool[queue_num];
2449         else
2450                 tx_pool = &adapter->tx_pool[queue_num];
2451
2452         bufidx = tx_pool->free_map[tx_pool->consumer_index];
2453
2454         if (bufidx == IBMVNIC_INVALID_MAP) {
2455                 dev_kfree_skb_any(skb);
2456                 tx_send_failed++;
2457                 tx_dropped++;
2458                 ret = NETDEV_TX_OK;
2459                 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
2460                 if (lpar_rc != H_SUCCESS)
2461                         goto tx_err;
2462                 goto out;
2463         }
2464
2465         tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
2466
2467         map_txpool_buf_to_ltb(tx_pool, bufidx, &ltb, &offset);
2468
2469         dst = ltb->buff + offset;
2470         memset(dst, 0, tx_pool->buf_size);
2471         data_dma_addr = ltb->addr + offset;
2472
2473         /* if we are going to send_subcrq_direct this then we need to
2474          * update the checksum before copying the data into ltb. Essentially
2475          * these packets force disable CSO so that we can guarantee that
2476          * FW does not need header info and we can send direct. Also, vnic
2477          * server must be able to xmit standard packets without header data
2478          */
2479         if (*hdrs == 0 && !skb_is_gso(skb) &&
2480             !ind_bufp->index && !netdev_xmit_more()) {
2481                 use_scrq_send_direct = true;
2482                 if (skb->ip_summed == CHECKSUM_PARTIAL &&
2483                     skb_checksum_help(skb))
2484                         use_scrq_send_direct = false;
2485         }
2486
2487         if (skb_shinfo(skb)->nr_frags) {
2488                 int cur, i;
2489
2490                 /* Copy the head */
2491                 skb_copy_from_linear_data(skb, dst, skb_headlen(skb));
2492                 cur = skb_headlen(skb);
2493
2494                 /* Copy the frags */
2495                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2496                         const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2497
2498                         memcpy(dst + cur, skb_frag_address(frag),
2499                                skb_frag_size(frag));
2500                         cur += skb_frag_size(frag);
2501                 }
2502         } else {
2503                 skb_copy_from_linear_data(skb, dst, skb->len);
2504         }
2505
2506         tx_pool->consumer_index =
2507             (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
2508
2509         tx_buff = &tx_pool->tx_buff[bufidx];
2510
2511         /* Sanity checks on our free map to make sure it points to an index
2512          * that is not being occupied by another skb. If skb memory is
2513          * not freed then we see congestion control kick in and halt tx.
2514          */
2515         if (unlikely(tx_buff->skb)) {
2516                 dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n",
2517                                      skb_is_gso(skb) ? "tso_pool" : "tx_pool",
2518                                      queue_num, bufidx);
2519                 dev_kfree_skb_any(tx_buff->skb);
2520         }
2521
2522         tx_buff->skb = skb;
2523         tx_buff->index = bufidx;
2524         tx_buff->pool_index = queue_num;
2525
2526         memset(&tx_crq, 0, sizeof(tx_crq));
2527         tx_crq.v1.first = IBMVNIC_CRQ_CMD;
2528         tx_crq.v1.type = IBMVNIC_TX_DESC;
2529         tx_crq.v1.n_crq_elem = 1;
2530         tx_crq.v1.n_sge = 1;
2531         tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
2532
2533         if (skb_is_gso(skb))
2534                 tx_crq.v1.correlator =
2535                         cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK);
2536         else
2537                 tx_crq.v1.correlator = cpu_to_be32(bufidx);
2538         tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id);
2539         tx_crq.v1.sge_len = cpu_to_be32(skb->len);
2540         tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
2541
2542         if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) {
2543                 tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT;
2544                 tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci);
2545         }
2546
2547         if (skb->protocol == htons(ETH_P_IP)) {
2548                 tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4;
2549                 proto = ip_hdr(skb)->protocol;
2550         } else if (skb->protocol == htons(ETH_P_IPV6)) {
2551                 tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6;
2552                 proto = ipv6_hdr(skb)->nexthdr;
2553         }
2554
2555         if (proto == IPPROTO_TCP)
2556                 tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP;
2557         else if (proto == IPPROTO_UDP)
2558                 tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP;
2559
2560         if (skb->ip_summed == CHECKSUM_PARTIAL) {
2561                 tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD;
2562                 hdrs += 2;
2563         }
2564         if (skb_is_gso(skb)) {
2565                 tx_crq.v1.flags1 |= IBMVNIC_TX_LSO;
2566                 tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
2567                 hdrs += 2;
2568         } else if (use_scrq_send_direct) {
2569                 /* See above comment, CSO disabled with direct xmit */
2570                 tx_crq.v1.flags1 &= ~(IBMVNIC_TX_CHKSUM_OFFLOAD);
2571                 ind_bufp->index = 1;
2572                 tx_buff->num_entries = 1;
2573                 netdev_tx_sent_queue(txq, skb->len);
2574                 ind_bufp->indir_arr[0] = tx_crq;
2575                 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, false);
2576                 if (lpar_rc != H_SUCCESS)
2577                         goto tx_err;
2578
2579                 tx_dpackets++;
2580                 goto early_exit;
2581         }
2582
2583         if ((*hdrs >> 7) & 1)
2584                 build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs);
2585
2586         tx_crq.v1.n_crq_elem = num_entries;
2587         tx_buff->num_entries = num_entries;
2588         /* flush buffer if current entry can not fit */
2589         if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
2590                 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
2591                 if (lpar_rc != H_SUCCESS)
2592                         goto tx_flush_err;
2593         }
2594
2595         indir_arr[0] = tx_crq;
2596         memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0],
2597                num_entries * sizeof(struct ibmvnic_generic_scrq));
2598
2599         ind_bufp->index += num_entries;
2600         if (__netdev_tx_sent_queue(txq, skb->len,
2601                                    netdev_xmit_more() &&
2602                                    ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
2603                 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
2604                 if (lpar_rc != H_SUCCESS)
2605                         goto tx_err;
2606         }
2607
2608         tx_bpackets++;
2609
2610 early_exit:
2611         if (atomic_add_return(num_entries, &tx_scrq->used)
2612                                         >= adapter->req_tx_entries_per_subcrq) {
2613                 netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
2614                 netif_stop_subqueue(netdev, queue_num);
2615         }
2616
2617         tx_bytes += skb->len;
2618         txq_trans_cond_update(txq);
2619         ret = NETDEV_TX_OK;
2620         goto out;
2621
2622 tx_flush_err:
2623         dev_kfree_skb_any(skb);
2624         tx_buff->skb = NULL;
2625         tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
2626                                   tx_pool->num_buffers - 1 :
2627                                   tx_pool->consumer_index - 1;
2628         tx_dropped++;
2629 tx_err:
2630         if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER)
2631                 dev_err_ratelimited(dev, "tx: send failed\n");
2632
2633         if (lpar_rc == H_CLOSED || adapter->failover_pending) {
2634                 /* Disable TX and report carrier off if queue is closed
2635                  * or pending failover.
2636                  * Firmware guarantees that a signal will be sent to the
2637                  * driver, triggering a reset or some other action.
2638                  */
2639                 netif_tx_stop_all_queues(netdev);
2640                 netif_carrier_off(netdev);
2641         }
2642 out:
2643         rcu_read_unlock();
2644         netdev->stats.tx_dropped += tx_dropped;
2645         netdev->stats.tx_bytes += tx_bytes;
2646         netdev->stats.tx_packets += tx_bpackets + tx_dpackets;
2647         adapter->tx_send_failed += tx_send_failed;
2648         adapter->tx_map_failed += tx_map_failed;
2649         adapter->tx_stats_buffers[queue_num].batched_packets += tx_bpackets;
2650         adapter->tx_stats_buffers[queue_num].direct_packets += tx_dpackets;
2651         adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
2652         adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
2653
2654         return ret;
2655 }
2656
2657 static void ibmvnic_set_multi(struct net_device *netdev)
2658 {
2659         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2660         struct netdev_hw_addr *ha;
2661         union ibmvnic_crq crq;
2662
2663         memset(&crq, 0, sizeof(crq));
2664         crq.request_capability.first = IBMVNIC_CRQ_CMD;
2665         crq.request_capability.cmd = REQUEST_CAPABILITY;
2666
2667         if (netdev->flags & IFF_PROMISC) {
2668                 if (!adapter->promisc_supported)
2669                         return;
2670         } else {
2671                 if (netdev->flags & IFF_ALLMULTI) {
2672                         /* Accept all multicast */
2673                         memset(&crq, 0, sizeof(crq));
2674                         crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD;
2675                         crq.multicast_ctrl.cmd = MULTICAST_CTRL;
2676                         crq.multicast_ctrl.flags = IBMVNIC_ENABLE_ALL;
2677                         ibmvnic_send_crq(adapter, &crq);
2678                 } else if (netdev_mc_empty(netdev)) {
2679                         /* Reject all multicast */
2680                         memset(&crq, 0, sizeof(crq));
2681                         crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD;
2682                         crq.multicast_ctrl.cmd = MULTICAST_CTRL;
2683                         crq.multicast_ctrl.flags = IBMVNIC_DISABLE_ALL;
2684                         ibmvnic_send_crq(adapter, &crq);
2685                 } else {
2686                         /* Accept one or more multicast(s) */
2687                         netdev_for_each_mc_addr(ha, netdev) {
2688                                 memset(&crq, 0, sizeof(crq));
2689                                 crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD;
2690                                 crq.multicast_ctrl.cmd = MULTICAST_CTRL;
2691                                 crq.multicast_ctrl.flags = IBMVNIC_ENABLE_MC;
2692                                 ether_addr_copy(&crq.multicast_ctrl.mac_addr[0],
2693                                                 ha->addr);
2694                                 ibmvnic_send_crq(adapter, &crq);
2695                         }
2696                 }
2697         }
2698 }
2699
2700 static int __ibmvnic_set_mac(struct net_device *netdev, u8 *dev_addr)
2701 {
2702         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2703         union ibmvnic_crq crq;
2704         int rc;
2705
2706         if (!is_valid_ether_addr(dev_addr)) {
2707                 rc = -EADDRNOTAVAIL;
2708                 goto err;
2709         }
2710
2711         memset(&crq, 0, sizeof(crq));
2712         crq.change_mac_addr.first = IBMVNIC_CRQ_CMD;
2713         crq.change_mac_addr.cmd = CHANGE_MAC_ADDR;
2714         ether_addr_copy(&crq.change_mac_addr.mac_addr[0], dev_addr);
2715
2716         mutex_lock(&adapter->fw_lock);
2717         adapter->fw_done_rc = 0;
2718         reinit_completion(&adapter->fw_done);
2719
2720         rc = ibmvnic_send_crq(adapter, &crq);
2721         if (rc) {
2722                 rc = -EIO;
2723                 mutex_unlock(&adapter->fw_lock);
2724                 goto err;
2725         }
2726
2727         rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
2728         /* netdev->dev_addr is changed in handle_change_mac_rsp function */
2729         if (rc || adapter->fw_done_rc) {
2730                 rc = -EIO;
2731                 mutex_unlock(&adapter->fw_lock);
2732                 goto err;
2733         }
2734         mutex_unlock(&adapter->fw_lock);
2735         return 0;
2736 err:
2737         ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
2738         return rc;
2739 }
2740
2741 static int ibmvnic_set_mac(struct net_device *netdev, void *p)
2742 {
2743         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
2744         struct sockaddr *addr = p;
2745         int rc;
2746
2747         rc = 0;
2748         if (!is_valid_ether_addr(addr->sa_data))
2749                 return -EADDRNOTAVAIL;
2750
2751         ether_addr_copy(adapter->mac_addr, addr->sa_data);
2752         if (adapter->state != VNIC_PROBED)
2753                 rc = __ibmvnic_set_mac(netdev, addr->sa_data);
2754
2755         return rc;
2756 }
2757
2758 static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason)
2759 {
2760         switch (reason) {
2761         case VNIC_RESET_FAILOVER:
2762                 return "FAILOVER";
2763         case VNIC_RESET_MOBILITY:
2764                 return "MOBILITY";
2765         case VNIC_RESET_FATAL:
2766                 return "FATAL";
2767         case VNIC_RESET_NON_FATAL:
2768                 return "NON_FATAL";
2769         case VNIC_RESET_TIMEOUT:
2770                 return "TIMEOUT";
2771         case VNIC_RESET_CHANGE_PARAM:
2772                 return "CHANGE_PARAM";
2773         case VNIC_RESET_PASSIVE_INIT:
2774                 return "PASSIVE_INIT";
2775         }
2776         return "UNKNOWN";
2777 }
2778
2779 /*
2780  * Initialize the init_done completion and return code values. We
2781  * can get a transport event just after registering the CRQ and the
2782  * tasklet will use this to communicate the transport event. To ensure
2783  * we don't miss the notification/error, initialize these _before_
2784  * regisering the CRQ.
2785  */
2786 static inline void reinit_init_done(struct ibmvnic_adapter *adapter)
2787 {
2788         reinit_completion(&adapter->init_done);
2789         adapter->init_done_rc = 0;
2790 }
2791
2792 /*
2793  * do_reset returns zero if we are able to keep processing reset events, or
2794  * non-zero if we hit a fatal error and must halt.
2795  */
2796 static int do_reset(struct ibmvnic_adapter *adapter,
2797                     struct ibmvnic_rwi *rwi, u32 reset_state)
2798 {
2799         struct net_device *netdev = adapter->netdev;
2800         u64 old_num_rx_queues, old_num_tx_queues;
2801         u64 old_num_rx_slots, old_num_tx_slots;
2802         int rc;
2803
2804         netdev_dbg(adapter->netdev,
2805                    "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n",
2806                    adapter_state_to_string(adapter->state),
2807                    adapter->failover_pending,
2808                    reset_reason_to_string(rwi->reset_reason),
2809                    adapter_state_to_string(reset_state));
2810
2811         adapter->reset_reason = rwi->reset_reason;
2812         /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */
2813         if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
2814                 rtnl_lock();
2815
2816         /* Now that we have the rtnl lock, clear any pending failover.
2817          * This will ensure ibmvnic_open() has either completed or will
2818          * block until failover is complete.
2819          */
2820         if (rwi->reset_reason == VNIC_RESET_FAILOVER)
2821                 adapter->failover_pending = false;
2822
2823         /* read the state and check (again) after getting rtnl */
2824         reset_state = adapter->state;
2825
2826         if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) {
2827                 rc = -EBUSY;
2828                 goto out;
2829         }
2830
2831         netif_carrier_off(netdev);
2832
2833         old_num_rx_queues = adapter->req_rx_queues;
2834         old_num_tx_queues = adapter->req_tx_queues;
2835         old_num_rx_slots = adapter->req_rx_add_entries_per_subcrq;
2836         old_num_tx_slots = adapter->req_tx_entries_per_subcrq;
2837
2838         ibmvnic_cleanup(netdev);
2839
2840         if (reset_state == VNIC_OPEN &&
2841             adapter->reset_reason != VNIC_RESET_MOBILITY &&
2842             adapter->reset_reason != VNIC_RESET_FAILOVER) {
2843                 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
2844                         rc = __ibmvnic_close(netdev);
2845                         if (rc)
2846                                 goto out;
2847                 } else {
2848                         adapter->state = VNIC_CLOSING;
2849
2850                         /* Release the RTNL lock before link state change and
2851                          * re-acquire after the link state change to allow
2852                          * linkwatch_event to grab the RTNL lock and run during
2853                          * a reset.
2854                          */
2855                         rtnl_unlock();
2856                         rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
2857                         rtnl_lock();
2858                         if (rc)
2859                                 goto out;
2860
2861                         if (adapter->state == VNIC_OPEN) {
2862                                 /* When we dropped rtnl, ibmvnic_open() got
2863                                  * it and noticed that we are resetting and
2864                                  * set the adapter state to OPEN. Update our
2865                                  * new "target" state, and resume the reset
2866                                  * from VNIC_CLOSING state.
2867                                  */
2868                                 netdev_dbg(netdev,
2869                                            "Open changed state from %s, updating.\n",
2870                                            adapter_state_to_string(reset_state));
2871                                 reset_state = VNIC_OPEN;
2872                                 adapter->state = VNIC_CLOSING;
2873                         }
2874
2875                         if (adapter->state != VNIC_CLOSING) {
2876                                 /* If someone else changed the adapter state
2877                                  * when we dropped the rtnl, fail the reset
2878                                  */
2879                                 rc = -EAGAIN;
2880                                 goto out;
2881                         }
2882                         adapter->state = VNIC_CLOSED;
2883                 }
2884         }
2885
2886         if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
2887                 release_resources(adapter);
2888                 release_sub_crqs(adapter, 1);
2889                 release_crq_queue(adapter);
2890         }
2891
2892         if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
2893                 /* remove the closed state so when we call open it appears
2894                  * we are coming from the probed state.
2895                  */
2896                 adapter->state = VNIC_PROBED;
2897
2898                 reinit_init_done(adapter);
2899
2900                 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
2901                         rc = init_crq_queue(adapter);
2902                 } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
2903                         rc = ibmvnic_reenable_crq_queue(adapter);
2904                         release_sub_crqs(adapter, 1);
2905                 } else {
2906                         rc = ibmvnic_reset_crq(adapter);
2907                         if (rc == H_CLOSED || rc == H_SUCCESS) {
2908                                 rc = vio_enable_interrupts(adapter->vdev);
2909                                 if (rc)
2910                                         netdev_err(adapter->netdev,
2911                                                    "Reset failed to enable interrupts. rc=%d\n",
2912                                                    rc);
2913                         }
2914                 }
2915
2916                 if (rc) {
2917                         netdev_err(adapter->netdev,
2918                                    "Reset couldn't initialize crq. rc=%d\n", rc);
2919                         goto out;
2920                 }
2921
2922                 rc = ibmvnic_reset_init(adapter, true);
2923                 if (rc)
2924                         goto out;
2925
2926                 /* If the adapter was in PROBE or DOWN state prior to the reset,
2927                  * exit here.
2928                  */
2929                 if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) {
2930                         rc = 0;
2931                         goto out;
2932                 }
2933
2934                 rc = ibmvnic_login(netdev);
2935                 if (rc)
2936                         goto out;
2937
2938                 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
2939                         rc = init_resources(adapter);
2940                         if (rc)
2941                                 goto out;
2942                 } else if (adapter->req_rx_queues != old_num_rx_queues ||
2943                     adapter->req_tx_queues != old_num_tx_queues ||
2944                     adapter->req_rx_add_entries_per_subcrq !=
2945                     old_num_rx_slots ||
2946                     adapter->req_tx_entries_per_subcrq !=
2947                     old_num_tx_slots ||
2948                     !adapter->rx_pool ||
2949                     !adapter->tso_pool ||
2950                     !adapter->tx_pool) {
2951                         release_napi(adapter);
2952                         release_vpd_data(adapter);
2953
2954                         rc = init_resources(adapter);
2955                         if (rc)
2956                                 goto out;
2957
2958                 } else {
2959                         rc = init_tx_pools(netdev);
2960                         if (rc) {
2961                                 netdev_dbg(netdev,
2962                                            "init tx pools failed (%d)\n",
2963                                            rc);
2964                                 goto out;
2965                         }
2966
2967                         rc = init_rx_pools(netdev);
2968                         if (rc) {
2969                                 netdev_dbg(netdev,
2970                                            "init rx pools failed (%d)\n",
2971                                            rc);
2972                                 goto out;
2973                         }
2974                 }
2975                 ibmvnic_disable_irqs(adapter);
2976         }
2977         adapter->state = VNIC_CLOSED;
2978
2979         if (reset_state == VNIC_CLOSED) {
2980                 rc = 0;
2981                 goto out;
2982         }
2983
2984         rc = __ibmvnic_open(netdev);
2985         if (rc) {
2986                 rc = IBMVNIC_OPEN_FAILED;
2987                 goto out;
2988         }
2989
2990         /* refresh device's multicast list */
2991         ibmvnic_set_multi(netdev);
2992
2993         if (adapter->reset_reason == VNIC_RESET_FAILOVER ||
2994             adapter->reset_reason == VNIC_RESET_MOBILITY)
2995                 __netdev_notify_peers(netdev);
2996
2997         rc = 0;
2998
2999 out:
3000         /* restore the adapter state if reset failed */
3001         if (rc)
3002                 adapter->state = reset_state;
3003         /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */
3004         if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
3005                 rtnl_unlock();
3006
3007         netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n",
3008                    adapter_state_to_string(adapter->state),
3009                    adapter->failover_pending, rc);
3010         return rc;
3011 }
3012
3013 static int do_hard_reset(struct ibmvnic_adapter *adapter,
3014                          struct ibmvnic_rwi *rwi, u32 reset_state)
3015 {
3016         struct net_device *netdev = adapter->netdev;
3017         int rc;
3018
3019         netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n",
3020                    reset_reason_to_string(rwi->reset_reason));
3021
3022         /* read the state and check (again) after getting rtnl */
3023         reset_state = adapter->state;
3024
3025         if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) {
3026                 rc = -EBUSY;
3027                 goto out;
3028         }
3029
3030         netif_carrier_off(netdev);
3031         adapter->reset_reason = rwi->reset_reason;
3032
3033         ibmvnic_cleanup(netdev);
3034         release_resources(adapter);
3035         release_sub_crqs(adapter, 0);
3036         release_crq_queue(adapter);
3037
3038         /* remove the closed state so when we call open it appears
3039          * we are coming from the probed state.
3040          */
3041         adapter->state = VNIC_PROBED;
3042
3043         reinit_init_done(adapter);
3044
3045         rc = init_crq_queue(adapter);
3046         if (rc) {
3047                 netdev_err(adapter->netdev,
3048                            "Couldn't initialize crq. rc=%d\n", rc);
3049                 goto out;
3050         }
3051
3052         rc = ibmvnic_reset_init(adapter, false);
3053         if (rc)
3054                 goto out;
3055
3056         /* If the adapter was in PROBE or DOWN state prior to the reset,
3057          * exit here.
3058          */
3059         if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN)
3060                 goto out;
3061
3062         rc = ibmvnic_login(netdev);
3063         if (rc)
3064                 goto out;
3065
3066         rc = init_resources(adapter);
3067         if (rc)
3068                 goto out;
3069
3070         ibmvnic_disable_irqs(adapter);
3071         adapter->state = VNIC_CLOSED;
3072
3073         if (reset_state == VNIC_CLOSED)
3074                 goto out;
3075
3076         rc = __ibmvnic_open(netdev);
3077         if (rc) {
3078                 rc = IBMVNIC_OPEN_FAILED;
3079                 goto out;
3080         }
3081
3082         __netdev_notify_peers(netdev);
3083 out:
3084         /* restore adapter state if reset failed */
3085         if (rc)
3086                 adapter->state = reset_state;
3087         netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n",
3088                    adapter_state_to_string(adapter->state),
3089                    adapter->failover_pending, rc);
3090         return rc;
3091 }
3092
3093 static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
3094 {
3095         struct ibmvnic_rwi *rwi;
3096         unsigned long flags;
3097
3098         spin_lock_irqsave(&adapter->rwi_lock, flags);
3099
3100         if (!list_empty(&adapter->rwi_list)) {
3101                 rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi,
3102                                        list);
3103                 list_del(&rwi->list);
3104         } else {
3105                 rwi = NULL;
3106         }
3107
3108         spin_unlock_irqrestore(&adapter->rwi_lock, flags);
3109         return rwi;
3110 }
3111
3112 /**
3113  * do_passive_init - complete probing when partner device is detected.
3114  * @adapter: ibmvnic_adapter struct
3115  *
3116  * If the ibmvnic device does not have a partner device to communicate with at boot
3117  * and that partner device comes online at a later time, this function is called
3118  * to complete the initialization process of ibmvnic device.
3119  * Caller is expected to hold rtnl_lock().
3120  *
3121  * Returns non-zero if sub-CRQs are not initialized properly leaving the device
3122  * in the down state.
3123  * Returns 0 upon success and the device is in PROBED state.
3124  */
3125
3126 static int do_passive_init(struct ibmvnic_adapter *adapter)
3127 {
3128         unsigned long timeout = msecs_to_jiffies(30000);
3129         struct net_device *netdev = adapter->netdev;
3130         struct device *dev = &adapter->vdev->dev;
3131         int rc;
3132
3133         netdev_dbg(netdev, "Partner device found, probing.\n");
3134
3135         adapter->state = VNIC_PROBING;
3136         reinit_completion(&adapter->init_done);
3137         adapter->init_done_rc = 0;
3138         adapter->crq.active = true;
3139
3140         rc = send_crq_init_complete(adapter);
3141         if (rc)
3142                 goto out;
3143
3144         rc = send_version_xchg(adapter);
3145         if (rc)
3146                 netdev_dbg(adapter->netdev, "send_version_xchg failed, rc=%d\n", rc);
3147
3148         if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
3149                 dev_err(dev, "Initialization sequence timed out\n");
3150                 rc = -ETIMEDOUT;
3151                 goto out;
3152         }
3153
3154         rc = init_sub_crqs(adapter);
3155         if (rc) {
3156                 dev_err(dev, "Initialization of sub crqs failed, rc=%d\n", rc);
3157                 goto out;
3158         }
3159
3160         rc = init_sub_crq_irqs(adapter);
3161         if (rc) {
3162                 dev_err(dev, "Failed to initialize sub crq irqs\n, rc=%d", rc);
3163                 goto init_failed;
3164         }
3165
3166         netdev->mtu = adapter->req_mtu - ETH_HLEN;
3167         netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
3168         netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
3169
3170         adapter->state = VNIC_PROBED;
3171         netdev_dbg(netdev, "Probed successfully. Waiting for signal from partner device.\n");
3172
3173         return 0;
3174
3175 init_failed:
3176         release_sub_crqs(adapter, 1);
3177 out:
3178         adapter->state = VNIC_DOWN;
3179         return rc;
3180 }
3181
3182 static void __ibmvnic_reset(struct work_struct *work)
3183 {
3184         struct ibmvnic_adapter *adapter;
3185         unsigned int timeout = 5000;
3186         struct ibmvnic_rwi *tmprwi;
3187         bool saved_state = false;
3188         struct ibmvnic_rwi *rwi;
3189         unsigned long flags;
3190         struct device *dev;
3191         bool need_reset;
3192         int num_fails = 0;
3193         u32 reset_state;
3194         int rc = 0;
3195
3196         adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
3197                 dev = &adapter->vdev->dev;
3198
3199         /* Wait for ibmvnic_probe() to complete. If probe is taking too long
3200          * or if another reset is in progress, defer work for now. If probe
3201          * eventually fails it will flush and terminate our work.
3202          *
3203          * Three possibilities here:
3204          * 1. Adpater being removed  - just return
3205          * 2. Timed out on probe or another reset in progress - delay the work
3206          * 3. Completed probe - perform any resets in queue
3207          */
3208         if (adapter->state == VNIC_PROBING &&
3209             !wait_for_completion_timeout(&adapter->probe_done, timeout)) {
3210                 dev_err(dev, "Reset thread timed out on probe");
3211                 queue_delayed_work(system_long_wq,
3212                                    &adapter->ibmvnic_delayed_reset,
3213                                    IBMVNIC_RESET_DELAY);
3214                 return;
3215         }
3216
3217         /* adapter is done with probe (i.e state is never VNIC_PROBING now) */
3218         if (adapter->state == VNIC_REMOVING)
3219                 return;
3220
3221         /* ->rwi_list is stable now (no one else is removing entries) */
3222
3223         /* ibmvnic_probe() may have purged the reset queue after we were
3224          * scheduled to process a reset so there maybe no resets to process.
3225          * Before setting the ->resetting bit though, we have to make sure
3226          * that there is infact a reset to process. Otherwise we may race
3227          * with ibmvnic_open() and end up leaving the vnic down:
3228          *
3229          *      __ibmvnic_reset()           ibmvnic_open()
3230          *      -----------------           --------------
3231          *
3232          *  set ->resetting bit
3233          *                              find ->resetting bit is set
3234          *                              set ->state to IBMVNIC_OPEN (i.e
3235          *                              assume reset will open device)
3236          *                              return
3237          *  find reset queue empty
3238          *  return
3239          *
3240          *      Neither performed vnic login/open and vnic stays down
3241          *
3242          * If we hold the lock and conditionally set the bit, either we
3243          * or ibmvnic_open() will complete the open.
3244          */
3245         need_reset = false;
3246         spin_lock(&adapter->rwi_lock);
3247         if (!list_empty(&adapter->rwi_list)) {
3248                 if (test_and_set_bit_lock(0, &adapter->resetting)) {
3249                         queue_delayed_work(system_long_wq,
3250                                            &adapter->ibmvnic_delayed_reset,
3251                                            IBMVNIC_RESET_DELAY);
3252                 } else {
3253                         need_reset = true;
3254                 }
3255         }
3256         spin_unlock(&adapter->rwi_lock);
3257
3258         if (!need_reset)
3259                 return;
3260
3261         rwi = get_next_rwi(adapter);
3262         while (rwi) {
3263                 spin_lock_irqsave(&adapter->state_lock, flags);
3264
3265                 if (adapter->state == VNIC_REMOVING ||
3266                     adapter->state == VNIC_REMOVED) {
3267                         spin_unlock_irqrestore(&adapter->state_lock, flags);
3268                         kfree(rwi);
3269                         rc = EBUSY;
3270                         break;
3271                 }
3272
3273                 if (!saved_state) {
3274                         reset_state = adapter->state;
3275                         saved_state = true;
3276                 }
3277                 spin_unlock_irqrestore(&adapter->state_lock, flags);
3278
3279                 if (rwi->reset_reason == VNIC_RESET_PASSIVE_INIT) {
3280                         rtnl_lock();
3281                         rc = do_passive_init(adapter);
3282                         rtnl_unlock();
3283                         if (!rc)
3284                                 netif_carrier_on(adapter->netdev);
3285                 } else if (adapter->force_reset_recovery) {
3286                         /* Since we are doing a hard reset now, clear the
3287                          * failover_pending flag so we don't ignore any
3288                          * future MOBILITY or other resets.
3289                          */
3290                         adapter->failover_pending = false;
3291
3292                         /* Transport event occurred during previous reset */
3293                         if (adapter->wait_for_reset) {
3294                                 /* Previous was CHANGE_PARAM; caller locked */
3295                                 adapter->force_reset_recovery = false;
3296                                 rc = do_hard_reset(adapter, rwi, reset_state);
3297                         } else {
3298                                 rtnl_lock();
3299                                 adapter->force_reset_recovery = false;
3300                                 rc = do_hard_reset(adapter, rwi, reset_state);
3301                                 rtnl_unlock();
3302                         }
3303                         if (rc)
3304                                 num_fails++;
3305                         else
3306                                 num_fails = 0;
3307
3308                         /* If auto-priority-failover is enabled we can get
3309                          * back to back failovers during resets, resulting
3310                          * in at least two failed resets (from high-priority
3311                          * backing device to low-priority one and then back)
3312                          * If resets continue to fail beyond that, give the
3313                          * adapter some time to settle down before retrying.
3314                          */
3315                         if (num_fails >= 3) {
3316                                 netdev_dbg(adapter->netdev,
3317                                            "[S:%s] Hard reset failed %d times, waiting 60 secs\n",
3318                                            adapter_state_to_string(adapter->state),
3319                                            num_fails);
3320                                 set_current_state(TASK_UNINTERRUPTIBLE);
3321                                 schedule_timeout(60 * HZ);
3322                         }
3323                 } else {
3324                         rc = do_reset(adapter, rwi, reset_state);
3325                 }
3326                 tmprwi = rwi;
3327                 adapter->last_reset_time = jiffies;
3328
3329                 if (rc)
3330                         netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc);
3331
3332                 rwi = get_next_rwi(adapter);
3333
3334                 /*
3335                  * If there are no resets queued and the previous reset failed,
3336                  * the adapter would be in an undefined state. So retry the
3337                  * previous reset as a hard reset.
3338                  *
3339                  * Else, free the previous rwi and, if there is another reset
3340                  * queued, process the new reset even if previous reset failed
3341                  * (the previous reset could have failed because of a fail
3342                  * over for instance, so process the fail over).
3343                  */
3344                 if (!rwi && rc)
3345                         rwi = tmprwi;
3346                 else
3347                         kfree(tmprwi);
3348
3349                 if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER ||
3350                             rwi->reset_reason == VNIC_RESET_MOBILITY || rc))
3351                         adapter->force_reset_recovery = true;
3352         }
3353
3354         if (adapter->wait_for_reset) {
3355                 adapter->reset_done_rc = rc;
3356                 complete(&adapter->reset_done);
3357         }
3358
3359         clear_bit_unlock(0, &adapter->resetting);
3360
3361         netdev_dbg(adapter->netdev,
3362                    "[S:%s FRR:%d WFR:%d] Done processing resets\n",
3363                    adapter_state_to_string(adapter->state),
3364                    adapter->force_reset_recovery,
3365                    adapter->wait_for_reset);
3366 }
3367
3368 static void __ibmvnic_delayed_reset(struct work_struct *work)
3369 {
3370         struct ibmvnic_adapter *adapter;
3371
3372         adapter = container_of(work, struct ibmvnic_adapter,
3373                                ibmvnic_delayed_reset.work);
3374         __ibmvnic_reset(&adapter->ibmvnic_reset);
3375 }
3376
3377 static void flush_reset_queue(struct ibmvnic_adapter *adapter)
3378 {
3379         struct list_head *entry, *tmp_entry;
3380
3381         if (!list_empty(&adapter->rwi_list)) {
3382                 list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) {
3383                         list_del(entry);
3384                         kfree(list_entry(entry, struct ibmvnic_rwi, list));
3385                 }
3386         }
3387 }
3388
3389 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
3390                          enum ibmvnic_reset_reason reason)
3391 {
3392         struct net_device *netdev = adapter->netdev;
3393         struct ibmvnic_rwi *rwi, *tmp;
3394         unsigned long flags;
3395         int ret;
3396
3397         spin_lock_irqsave(&adapter->rwi_lock, flags);
3398
3399         /* If failover is pending don't schedule any other reset.
3400          * Instead let the failover complete. If there is already a
3401          * a failover reset scheduled, we will detect and drop the
3402          * duplicate reset when walking the ->rwi_list below.
3403          */
3404         if (adapter->state == VNIC_REMOVING ||
3405             adapter->state == VNIC_REMOVED ||
3406             (adapter->failover_pending && reason != VNIC_RESET_FAILOVER)) {
3407                 ret = EBUSY;
3408                 netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n");
3409                 goto err;
3410         }
3411
3412         list_for_each_entry(tmp, &adapter->rwi_list, list) {
3413                 if (tmp->reset_reason == reason) {
3414                         netdev_dbg(netdev, "Skipping matching reset, reason=%s\n",
3415                                    reset_reason_to_string(reason));
3416                         ret = EBUSY;
3417                         goto err;
3418                 }
3419         }
3420
3421         rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC);
3422         if (!rwi) {
3423                 ret = ENOMEM;
3424                 goto err;
3425         }
3426         /* if we just received a transport event,
3427          * flush reset queue and process this reset
3428          */
3429         if (adapter->force_reset_recovery)
3430                 flush_reset_queue(adapter);
3431
3432         rwi->reset_reason = reason;
3433         list_add_tail(&rwi->list, &adapter->rwi_list);
3434         netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n",
3435                    reset_reason_to_string(reason));
3436         queue_work(system_long_wq, &adapter->ibmvnic_reset);
3437
3438         ret = 0;
3439 err:
3440         /* ibmvnic_close() below can block, so drop the lock first */
3441         spin_unlock_irqrestore(&adapter->rwi_lock, flags);
3442
3443         if (ret == ENOMEM)
3444                 ibmvnic_close(netdev);
3445
3446         return -ret;
3447 }
3448
3449 static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
3450 {
3451         struct ibmvnic_adapter *adapter = netdev_priv(dev);
3452
3453         if (test_bit(0, &adapter->resetting)) {
3454                 netdev_err(adapter->netdev,
3455                            "Adapter is resetting, skip timeout reset\n");
3456                 return;
3457         }
3458         /* No queuing up reset until at least 5 seconds (default watchdog val)
3459          * after last reset
3460          */
3461         if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) {
3462                 netdev_dbg(dev, "Not yet time to tx timeout.\n");
3463                 return;
3464         }
3465         ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT);
3466 }
3467
3468 static void remove_buff_from_pool(struct ibmvnic_adapter *adapter,
3469                                   struct ibmvnic_rx_buff *rx_buff)
3470 {
3471         struct ibmvnic_rx_pool *pool = &adapter->rx_pool[rx_buff->pool_index];
3472
3473         rx_buff->skb = NULL;
3474
3475         pool->free_map[pool->next_alloc] = (int)(rx_buff - pool->rx_buff);
3476         pool->next_alloc = (pool->next_alloc + 1) % pool->size;
3477
3478         atomic_dec(&pool->available);
3479 }
3480
3481 static int ibmvnic_poll(struct napi_struct *napi, int budget)
3482 {
3483         struct ibmvnic_sub_crq_queue *rx_scrq;
3484         struct ibmvnic_adapter *adapter;
3485         struct net_device *netdev;
3486         int frames_processed;
3487         int scrq_num;
3488
3489         netdev = napi->dev;
3490         adapter = netdev_priv(netdev);
3491         scrq_num = (int)(napi - adapter->napi);
3492         frames_processed = 0;
3493         rx_scrq = adapter->rx_scrq[scrq_num];
3494
3495 restart_poll:
3496         while (frames_processed < budget) {
3497                 struct sk_buff *skb;
3498                 struct ibmvnic_rx_buff *rx_buff;
3499                 union sub_crq *next;
3500                 u32 length;
3501                 u16 offset;
3502                 u8 flags = 0;
3503
3504                 if (unlikely(test_bit(0, &adapter->resetting) &&
3505                              adapter->reset_reason != VNIC_RESET_NON_FATAL)) {
3506                         enable_scrq_irq(adapter, rx_scrq);
3507                         napi_complete_done(napi, frames_processed);
3508                         return frames_processed;
3509                 }
3510
3511                 if (!pending_scrq(adapter, rx_scrq))
3512                         break;
3513                 next = ibmvnic_next_scrq(adapter, rx_scrq);
3514                 rx_buff = (struct ibmvnic_rx_buff *)
3515                           be64_to_cpu(next->rx_comp.correlator);
3516                 /* do error checking */
3517                 if (next->rx_comp.rc) {
3518                         netdev_dbg(netdev, "rx buffer returned with rc %x\n",
3519                                    be16_to_cpu(next->rx_comp.rc));
3520                         /* free the entry */
3521                         next->rx_comp.first = 0;
3522                         dev_kfree_skb_any(rx_buff->skb);
3523                         remove_buff_from_pool(adapter, rx_buff);
3524                         continue;
3525                 } else if (!rx_buff->skb) {
3526                         /* free the entry */
3527                         next->rx_comp.first = 0;
3528                         remove_buff_from_pool(adapter, rx_buff);
3529                         continue;
3530                 }
3531
3532                 length = be32_to_cpu(next->rx_comp.len);
3533                 offset = be16_to_cpu(next->rx_comp.off_frame_data);
3534                 flags = next->rx_comp.flags;
3535                 skb = rx_buff->skb;
3536                 /* load long_term_buff before copying to skb */
3537                 dma_rmb();
3538                 skb_copy_to_linear_data(skb, rx_buff->data + offset,
3539                                         length);
3540
3541                 /* VLAN Header has been stripped by the system firmware and
3542                  * needs to be inserted by the driver
3543                  */
3544                 if (adapter->rx_vlan_header_insertion &&
3545                     (flags & IBMVNIC_VLAN_STRIPPED))
3546                         __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
3547                                                ntohs(next->rx_comp.vlan_tci));
3548
3549                 /* free the entry */
3550                 next->rx_comp.first = 0;
3551                 remove_buff_from_pool(adapter, rx_buff);
3552
3553                 skb_put(skb, length);
3554                 skb->protocol = eth_type_trans(skb, netdev);
3555                 skb_record_rx_queue(skb, scrq_num);
3556
3557                 if (flags & IBMVNIC_IP_CHKSUM_GOOD &&
3558                     flags & IBMVNIC_TCP_UDP_CHKSUM_GOOD) {
3559                         skb->ip_summed = CHECKSUM_UNNECESSARY;
3560                 }
3561
3562                 length = skb->len;
3563                 napi_gro_receive(napi, skb); /* send it up */
3564                 netdev->stats.rx_packets++;
3565                 netdev->stats.rx_bytes += length;
3566                 adapter->rx_stats_buffers[scrq_num].packets++;
3567                 adapter->rx_stats_buffers[scrq_num].bytes += length;
3568                 frames_processed++;
3569         }
3570
3571         if (adapter->state != VNIC_CLOSING &&
3572             (atomic_read(&adapter->rx_pool[scrq_num].available) <
3573               adapter->req_rx_add_entries_per_subcrq / 2))
3574                 replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
3575         if (frames_processed < budget) {
3576                 if (napi_complete_done(napi, frames_processed)) {
3577                         enable_scrq_irq(adapter, rx_scrq);
3578                         if (pending_scrq(adapter, rx_scrq)) {
3579                                 if (napi_schedule(napi)) {
3580                                         disable_scrq_irq(adapter, rx_scrq);
3581                                         goto restart_poll;
3582                                 }
3583                         }
3584                 }
3585         }
3586         return frames_processed;
3587 }
3588
3589 static int wait_for_reset(struct ibmvnic_adapter *adapter)
3590 {
3591         int rc, ret;
3592
3593         adapter->fallback.mtu = adapter->req_mtu;
3594         adapter->fallback.rx_queues = adapter->req_rx_queues;
3595         adapter->fallback.tx_queues = adapter->req_tx_queues;
3596         adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq;
3597         adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq;
3598
3599         reinit_completion(&adapter->reset_done);
3600         adapter->wait_for_reset = true;
3601         rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
3602
3603         if (rc) {
3604                 ret = rc;
3605                 goto out;
3606         }
3607         rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done, 60000);
3608         if (rc) {
3609                 ret = -ENODEV;
3610                 goto out;
3611         }
3612
3613         ret = 0;
3614         if (adapter->reset_done_rc) {
3615                 ret = -EIO;
3616                 adapter->desired.mtu = adapter->fallback.mtu;
3617                 adapter->desired.rx_queues = adapter->fallback.rx_queues;
3618                 adapter->desired.tx_queues = adapter->fallback.tx_queues;
3619                 adapter->desired.rx_entries = adapter->fallback.rx_entries;
3620                 adapter->desired.tx_entries = adapter->fallback.tx_entries;
3621
3622                 reinit_completion(&adapter->reset_done);
3623                 adapter->wait_for_reset = true;
3624                 rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
3625                 if (rc) {
3626                         ret = rc;
3627                         goto out;
3628                 }
3629                 rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done,
3630                                                  60000);
3631                 if (rc) {
3632                         ret = -ENODEV;
3633                         goto out;
3634                 }
3635         }
3636 out:
3637         adapter->wait_for_reset = false;
3638
3639         return ret;
3640 }
3641
3642 static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
3643 {
3644         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3645
3646         adapter->desired.mtu = new_mtu + ETH_HLEN;
3647
3648         return wait_for_reset(adapter);
3649 }
3650
3651 static netdev_features_t ibmvnic_features_check(struct sk_buff *skb,
3652                                                 struct net_device *dev,
3653                                                 netdev_features_t features)
3654 {
3655         /* Some backing hardware adapters can not
3656          * handle packets with a MSS less than 224
3657          * or with only one segment.
3658          */
3659         if (skb_is_gso(skb)) {
3660                 if (skb_shinfo(skb)->gso_size < 224 ||
3661                     skb_shinfo(skb)->gso_segs == 1)
3662                         features &= ~NETIF_F_GSO_MASK;
3663         }
3664
3665         return features;
3666 }
3667
3668 static const struct net_device_ops ibmvnic_netdev_ops = {
3669         .ndo_open               = ibmvnic_open,
3670         .ndo_stop               = ibmvnic_close,
3671         .ndo_start_xmit         = ibmvnic_xmit,
3672         .ndo_set_rx_mode        = ibmvnic_set_multi,
3673         .ndo_set_mac_address    = ibmvnic_set_mac,
3674         .ndo_validate_addr      = eth_validate_addr,
3675         .ndo_tx_timeout         = ibmvnic_tx_timeout,
3676         .ndo_change_mtu         = ibmvnic_change_mtu,
3677         .ndo_features_check     = ibmvnic_features_check,
3678 };
3679
3680 /* ethtool functions */
3681
3682 static int ibmvnic_get_link_ksettings(struct net_device *netdev,
3683                                       struct ethtool_link_ksettings *cmd)
3684 {
3685         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3686         int rc;
3687
3688         rc = send_query_phys_parms(adapter);
3689         if (rc) {
3690                 adapter->speed = SPEED_UNKNOWN;
3691                 adapter->duplex = DUPLEX_UNKNOWN;
3692         }
3693         cmd->base.speed = adapter->speed;
3694         cmd->base.duplex = adapter->duplex;
3695         cmd->base.port = PORT_FIBRE;
3696         cmd->base.phy_address = 0;
3697         cmd->base.autoneg = AUTONEG_ENABLE;
3698
3699         return 0;
3700 }
3701
3702 static void ibmvnic_get_drvinfo(struct net_device *netdev,
3703                                 struct ethtool_drvinfo *info)
3704 {
3705         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3706
3707         strscpy(info->driver, ibmvnic_driver_name, sizeof(info->driver));
3708         strscpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version));
3709         strscpy(info->fw_version, adapter->fw_version,
3710                 sizeof(info->fw_version));
3711 }
3712
3713 static u32 ibmvnic_get_msglevel(struct net_device *netdev)
3714 {
3715         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3716
3717         return adapter->msg_enable;
3718 }
3719
3720 static void ibmvnic_set_msglevel(struct net_device *netdev, u32 data)
3721 {
3722         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3723
3724         adapter->msg_enable = data;
3725 }
3726
3727 static u32 ibmvnic_get_link(struct net_device *netdev)
3728 {
3729         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3730
3731         /* Don't need to send a query because we request a logical link up at
3732          * init and then we wait for link state indications
3733          */
3734         return adapter->logical_link_state;
3735 }
3736
3737 static void ibmvnic_get_ringparam(struct net_device *netdev,
3738                                   struct ethtool_ringparam *ring,
3739                                   struct kernel_ethtool_ringparam *kernel_ring,
3740                                   struct netlink_ext_ack *extack)
3741 {
3742         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3743
3744         ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
3745         ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
3746         ring->rx_mini_max_pending = 0;
3747         ring->rx_jumbo_max_pending = 0;
3748         ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
3749         ring->tx_pending = adapter->req_tx_entries_per_subcrq;
3750         ring->rx_mini_pending = 0;
3751         ring->rx_jumbo_pending = 0;
3752 }
3753
3754 static int ibmvnic_set_ringparam(struct net_device *netdev,
3755                                  struct ethtool_ringparam *ring,
3756                                  struct kernel_ethtool_ringparam *kernel_ring,
3757                                  struct netlink_ext_ack *extack)
3758 {
3759         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3760
3761         if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq  ||
3762             ring->tx_pending > adapter->max_tx_entries_per_subcrq) {
3763                 netdev_err(netdev, "Invalid request.\n");
3764                 netdev_err(netdev, "Max tx buffers = %llu\n",
3765                            adapter->max_rx_add_entries_per_subcrq);
3766                 netdev_err(netdev, "Max rx buffers = %llu\n",
3767                            adapter->max_tx_entries_per_subcrq);
3768                 return -EINVAL;
3769         }
3770
3771         adapter->desired.rx_entries = ring->rx_pending;
3772         adapter->desired.tx_entries = ring->tx_pending;
3773
3774         return wait_for_reset(adapter);
3775 }
3776
3777 static void ibmvnic_get_channels(struct net_device *netdev,
3778                                  struct ethtool_channels *channels)
3779 {
3780         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3781
3782         channels->max_rx = adapter->max_rx_queues;
3783         channels->max_tx = adapter->max_tx_queues;
3784         channels->max_other = 0;
3785         channels->max_combined = 0;
3786         channels->rx_count = adapter->req_rx_queues;
3787         channels->tx_count = adapter->req_tx_queues;
3788         channels->other_count = 0;
3789         channels->combined_count = 0;
3790 }
3791
3792 static int ibmvnic_set_channels(struct net_device *netdev,
3793                                 struct ethtool_channels *channels)
3794 {
3795         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
3796
3797         adapter->desired.rx_queues = channels->rx_count;
3798         adapter->desired.tx_queues = channels->tx_count;
3799
3800         return wait_for_reset(adapter);
3801 }
3802
3803 static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
3804 {
3805         struct ibmvnic_adapter *adapter = netdev_priv(dev);
3806         int i;
3807
3808         if (stringset != ETH_SS_STATS)
3809                 return;
3810
3811         for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
3812                 ethtool_puts(&data, ibmvnic_stats[i].name);
3813
3814         for (i = 0; i < adapter->req_tx_queues; i++) {
3815                 ethtool_sprintf(&data, "tx%d_batched_packets", i);
3816                 ethtool_sprintf(&data, "tx%d_direct_packets", i);
3817                 ethtool_sprintf(&data, "tx%d_bytes", i);
3818                 ethtool_sprintf(&data, "tx%d_dropped_packets", i);
3819         }
3820
3821         for (i = 0; i < adapter->req_rx_queues; i++) {
3822                 ethtool_sprintf(&data, "rx%d_packets", i);
3823                 ethtool_sprintf(&data, "rx%d_bytes", i);
3824                 ethtool_sprintf(&data, "rx%d_interrupts", i);
3825         }
3826 }
3827
3828 static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
3829 {
3830         struct ibmvnic_adapter *adapter = netdev_priv(dev);
3831
3832         switch (sset) {
3833         case ETH_SS_STATS:
3834                 return ARRAY_SIZE(ibmvnic_stats) +
3835                        adapter->req_tx_queues * NUM_TX_STATS +
3836                        adapter->req_rx_queues * NUM_RX_STATS;
3837         default:
3838                 return -EOPNOTSUPP;
3839         }
3840 }
3841
3842 static void ibmvnic_get_ethtool_stats(struct net_device *dev,
3843                                       struct ethtool_stats *stats, u64 *data)
3844 {
3845         struct ibmvnic_adapter *adapter = netdev_priv(dev);
3846         union ibmvnic_crq crq;
3847         int i, j;
3848         int rc;
3849
3850         memset(&crq, 0, sizeof(crq));
3851         crq.request_statistics.first = IBMVNIC_CRQ_CMD;
3852         crq.request_statistics.cmd = REQUEST_STATISTICS;
3853         crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token);
3854         crq.request_statistics.len =
3855             cpu_to_be32(sizeof(struct ibmvnic_statistics));
3856
3857         /* Wait for data to be written */
3858         reinit_completion(&adapter->stats_done);
3859         rc = ibmvnic_send_crq(adapter, &crq);
3860         if (rc)
3861                 return;
3862         rc = ibmvnic_wait_for_completion(adapter, &adapter->stats_done, 10000);
3863         if (rc)
3864                 return;
3865
3866         for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
3867                 data[i] = be64_to_cpu(IBMVNIC_GET_STAT
3868                                       (adapter, ibmvnic_stats[i].offset));
3869
3870         for (j = 0; j < adapter->req_tx_queues; j++) {
3871                 data[i] = adapter->tx_stats_buffers[j].batched_packets;
3872                 i++;
3873                 data[i] = adapter->tx_stats_buffers[j].direct_packets;
3874                 i++;
3875                 data[i] = adapter->tx_stats_buffers[j].bytes;
3876                 i++;
3877                 data[i] = adapter->tx_stats_buffers[j].dropped_packets;
3878                 i++;
3879         }
3880
3881         for (j = 0; j < adapter->req_rx_queues; j++) {
3882                 data[i] = adapter->rx_stats_buffers[j].packets;
3883                 i++;
3884                 data[i] = adapter->rx_stats_buffers[j].bytes;
3885                 i++;
3886                 data[i] = adapter->rx_stats_buffers[j].interrupts;
3887                 i++;
3888         }
3889 }
3890
3891 static const struct ethtool_ops ibmvnic_ethtool_ops = {
3892         .get_drvinfo            = ibmvnic_get_drvinfo,
3893         .get_msglevel           = ibmvnic_get_msglevel,
3894         .set_msglevel           = ibmvnic_set_msglevel,
3895         .get_link               = ibmvnic_get_link,
3896         .get_ringparam          = ibmvnic_get_ringparam,
3897         .set_ringparam          = ibmvnic_set_ringparam,
3898         .get_channels           = ibmvnic_get_channels,
3899         .set_channels           = ibmvnic_set_channels,
3900         .get_strings            = ibmvnic_get_strings,
3901         .get_sset_count         = ibmvnic_get_sset_count,
3902         .get_ethtool_stats      = ibmvnic_get_ethtool_stats,
3903         .get_link_ksettings     = ibmvnic_get_link_ksettings,
3904 };
3905
3906 /* Routines for managing CRQs/sCRQs  */
3907
3908 static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter,
3909                                    struct ibmvnic_sub_crq_queue *scrq)
3910 {
3911         int rc;
3912
3913         if (!scrq) {
3914                 netdev_dbg(adapter->netdev, "Invalid scrq reset.\n");
3915                 return -EINVAL;
3916         }
3917
3918         if (scrq->irq) {
3919                 free_irq(scrq->irq, scrq);
3920                 irq_dispose_mapping(scrq->irq);
3921                 scrq->irq = 0;
3922         }
3923
3924         if (scrq->msgs) {
3925                 memset(scrq->msgs, 0, 4 * PAGE_SIZE);
3926                 atomic_set(&scrq->used, 0);
3927                 scrq->cur = 0;
3928                 scrq->ind_buf.index = 0;
3929         } else {
3930                 netdev_dbg(adapter->netdev, "Invalid scrq reset\n");
3931                 return -EINVAL;
3932         }
3933
3934         rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
3935                            4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq);
3936         return rc;
3937 }
3938
3939 static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
3940 {
3941         int i, rc;
3942
3943         if (!adapter->tx_scrq || !adapter->rx_scrq)
3944                 return -EINVAL;
3945
3946         ibmvnic_clean_affinity(adapter);
3947
3948         for (i = 0; i < adapter->req_tx_queues; i++) {
3949                 netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i);
3950                 rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]);
3951                 if (rc)
3952                         return rc;
3953         }
3954
3955         for (i = 0; i < adapter->req_rx_queues; i++) {
3956                 netdev_dbg(adapter->netdev, "Re-setting rx_scrq[%d]\n", i);
3957                 rc = reset_one_sub_crq_queue(adapter, adapter->rx_scrq[i]);
3958                 if (rc)
3959                         return rc;
3960         }
3961
3962         return rc;
3963 }
3964
3965 static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
3966                                   struct ibmvnic_sub_crq_queue *scrq,
3967                                   bool do_h_free)
3968 {
3969         struct device *dev = &adapter->vdev->dev;
3970         long rc;
3971
3972         netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n");
3973
3974         if (do_h_free) {
3975                 /* Close the sub-crqs */
3976                 do {
3977                         rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
3978                                                 adapter->vdev->unit_address,
3979                                                 scrq->crq_num);
3980                 } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
3981
3982                 if (rc) {
3983                         netdev_err(adapter->netdev,
3984                                    "Failed to release sub-CRQ %16lx, rc = %ld\n",
3985                                    scrq->crq_num, rc);
3986                 }
3987         }
3988
3989         dma_free_coherent(dev,
3990                           IBMVNIC_IND_ARR_SZ,
3991                           scrq->ind_buf.indir_arr,
3992                           scrq->ind_buf.indir_dma);
3993
3994         dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
3995                          DMA_BIDIRECTIONAL);
3996         free_pages((unsigned long)scrq->msgs, 2);
3997         free_cpumask_var(scrq->affinity_mask);
3998         kfree(scrq);
3999 }
4000
4001 static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
4002                                                         *adapter)
4003 {
4004         struct device *dev = &adapter->vdev->dev;
4005         struct ibmvnic_sub_crq_queue *scrq;
4006         int rc;
4007
4008         scrq = kzalloc(sizeof(*scrq), GFP_KERNEL);
4009         if (!scrq)
4010                 return NULL;
4011
4012         scrq->msgs =
4013                 (union sub_crq *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 2);
4014         if (!scrq->msgs) {
4015                 dev_warn(dev, "Couldn't allocate crq queue messages page\n");
4016                 goto zero_page_failed;
4017         }
4018         if (!zalloc_cpumask_var(&scrq->affinity_mask, GFP_KERNEL))
4019                 goto cpumask_alloc_failed;
4020
4021         scrq->msg_token = dma_map_single(dev, scrq->msgs, 4 * PAGE_SIZE,
4022                                          DMA_BIDIRECTIONAL);
4023         if (dma_mapping_error(dev, scrq->msg_token)) {
4024                 dev_warn(dev, "Couldn't map crq queue messages page\n");
4025                 goto map_failed;
4026         }
4027
4028         rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
4029                            4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq);
4030
4031         if (rc == H_RESOURCE)
4032                 rc = ibmvnic_reset_crq(adapter);
4033
4034         if (rc == H_CLOSED) {
4035                 dev_warn(dev, "Partner adapter not ready, waiting.\n");
4036         } else if (rc) {
4037                 dev_warn(dev, "Error %d registering sub-crq\n", rc);
4038                 goto reg_failed;
4039         }
4040
4041         scrq->adapter = adapter;
4042         scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
4043         scrq->ind_buf.index = 0;
4044
4045         scrq->ind_buf.indir_arr =
4046                 dma_alloc_coherent(dev,
4047                                    IBMVNIC_IND_ARR_SZ,
4048                                    &scrq->ind_buf.indir_dma,
4049                                    GFP_KERNEL);
4050
4051         if (!scrq->ind_buf.indir_arr)
4052                 goto indir_failed;
4053
4054         spin_lock_init(&scrq->lock);
4055
4056         netdev_dbg(adapter->netdev,
4057                    "sub-crq initialized, num %lx, hw_irq=%lx, irq=%x\n",
4058                    scrq->crq_num, scrq->hw_irq, scrq->irq);
4059
4060         return scrq;
4061
4062 indir_failed:
4063         do {
4064                 rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
4065                                         adapter->vdev->unit_address,
4066                                         scrq->crq_num);
4067         } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc));
4068 reg_failed:
4069         dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
4070                          DMA_BIDIRECTIONAL);
4071 map_failed:
4072         free_cpumask_var(scrq->affinity_mask);
4073 cpumask_alloc_failed:
4074         free_pages((unsigned long)scrq->msgs, 2);
4075 zero_page_failed:
4076         kfree(scrq);
4077
4078         return NULL;
4079 }
4080
4081 static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
4082 {
4083         int i;
4084
4085         ibmvnic_clean_affinity(adapter);
4086         if (adapter->tx_scrq) {
4087                 for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
4088                         if (!adapter->tx_scrq[i])
4089                                 continue;
4090
4091                         netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n",
4092                                    i);
4093                         ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]);
4094                         if (adapter->tx_scrq[i]->irq) {
4095                                 free_irq(adapter->tx_scrq[i]->irq,
4096                                          adapter->tx_scrq[i]);
4097                                 irq_dispose_mapping(adapter->tx_scrq[i]->irq);
4098                                 adapter->tx_scrq[i]->irq = 0;
4099                         }
4100
4101                         release_sub_crq_queue(adapter, adapter->tx_scrq[i],
4102                                               do_h_free);
4103                 }
4104
4105                 kfree(adapter->tx_scrq);
4106                 adapter->tx_scrq = NULL;
4107                 adapter->num_active_tx_scrqs = 0;
4108         }
4109
4110         /* Clean any remaining outstanding SKBs
4111          * we freed the irq so we won't be hearing
4112          * from them
4113          */
4114         clean_tx_pools(adapter);
4115
4116         if (adapter->rx_scrq) {
4117                 for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
4118                         if (!adapter->rx_scrq[i])
4119                                 continue;
4120
4121                         netdev_dbg(adapter->netdev, "Releasing rx_scrq[%d]\n",
4122                                    i);
4123                         if (adapter->rx_scrq[i]->irq) {
4124                                 free_irq(adapter->rx_scrq[i]->irq,
4125                                          adapter->rx_scrq[i]);
4126                                 irq_dispose_mapping(adapter->rx_scrq[i]->irq);
4127                                 adapter->rx_scrq[i]->irq = 0;
4128                         }
4129
4130                         release_sub_crq_queue(adapter, adapter->rx_scrq[i],
4131                                               do_h_free);
4132                 }
4133
4134                 kfree(adapter->rx_scrq);
4135                 adapter->rx_scrq = NULL;
4136                 adapter->num_active_rx_scrqs = 0;
4137         }
4138 }
4139
4140 static int disable_scrq_irq(struct ibmvnic_adapter *adapter,
4141                             struct ibmvnic_sub_crq_queue *scrq)
4142 {
4143         struct device *dev = &adapter->vdev->dev;
4144         unsigned long rc;
4145
4146         rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
4147                                 H_DISABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0);
4148         if (rc)
4149                 dev_err(dev, "Couldn't disable scrq irq 0x%lx. rc=%ld\n",
4150                         scrq->hw_irq, rc);
4151         return rc;
4152 }
4153
4154 /* We can not use the IRQ chip EOI handler because that has the
4155  * unintended effect of changing the interrupt priority.
4156  */
4157 static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq)
4158 {
4159         u64 val = 0xff000000 | scrq->hw_irq;
4160         unsigned long rc;
4161
4162         rc = plpar_hcall_norets(H_EOI, val);
4163         if (rc)
4164                 dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc);
4165 }
4166
4167 /* Due to a firmware bug, the hypervisor can send an interrupt to a
4168  * transmit or receive queue just prior to a partition migration.
4169  * Force an EOI after migration.
4170  */
4171 static void ibmvnic_clear_pending_interrupt(struct device *dev,
4172                                             struct ibmvnic_sub_crq_queue *scrq)
4173 {
4174         if (!xive_enabled())
4175                 ibmvnic_xics_eoi(dev, scrq);
4176 }
4177
4178 static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
4179                            struct ibmvnic_sub_crq_queue *scrq)
4180 {
4181         struct device *dev = &adapter->vdev->dev;
4182         unsigned long rc;
4183
4184         if (scrq->hw_irq > 0x100000000ULL) {
4185                 dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq);
4186                 return 1;
4187         }
4188
4189         if (test_bit(0, &adapter->resetting) &&
4190             adapter->reset_reason == VNIC_RESET_MOBILITY) {
4191                 ibmvnic_clear_pending_interrupt(dev, scrq);
4192         }
4193
4194         rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
4195                                 H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0);
4196         if (rc)
4197                 dev_err(dev, "Couldn't enable scrq irq 0x%lx. rc=%ld\n",
4198                         scrq->hw_irq, rc);
4199         return rc;
4200 }
4201
4202 static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
4203                                struct ibmvnic_sub_crq_queue *scrq)
4204 {
4205         struct device *dev = &adapter->vdev->dev;
4206         int num_packets = 0, total_bytes = 0;
4207         struct ibmvnic_tx_pool *tx_pool;
4208         struct ibmvnic_tx_buff *txbuff;
4209         struct netdev_queue *txq;
4210         union sub_crq *next;
4211         int index, i;
4212
4213 restart_loop:
4214         while (pending_scrq(adapter, scrq)) {
4215                 unsigned int pool = scrq->pool_index;
4216                 int num_entries = 0;
4217                 next = ibmvnic_next_scrq(adapter, scrq);
4218                 for (i = 0; i < next->tx_comp.num_comps; i++) {
4219                         index = be32_to_cpu(next->tx_comp.correlators[i]);
4220                         if (index & IBMVNIC_TSO_POOL_MASK) {
4221                                 tx_pool = &adapter->tso_pool[pool];
4222                                 index &= ~IBMVNIC_TSO_POOL_MASK;
4223                         } else {
4224                                 tx_pool = &adapter->tx_pool[pool];
4225                         }
4226
4227                         txbuff = &tx_pool->tx_buff[index];
4228                         num_packets++;
4229                         num_entries += txbuff->num_entries;
4230                         if (txbuff->skb) {
4231                                 total_bytes += txbuff->skb->len;
4232                                 if (next->tx_comp.rcs[i]) {
4233                                         dev_err(dev, "tx error %x\n",
4234                                                 next->tx_comp.rcs[i]);
4235                                         dev_kfree_skb_irq(txbuff->skb);
4236                                 } else {
4237                                         dev_consume_skb_irq(txbuff->skb);
4238                                 }
4239                                 txbuff->skb = NULL;
4240                         } else {
4241                                 netdev_warn(adapter->netdev,
4242                                             "TX completion received with NULL socket buffer\n");
4243                         }
4244                         tx_pool->free_map[tx_pool->producer_index] = index;
4245                         tx_pool->producer_index =
4246                                 (tx_pool->producer_index + 1) %
4247                                         tx_pool->num_buffers;
4248                 }
4249                 /* remove tx_comp scrq*/
4250                 next->tx_comp.first = 0;
4251
4252
4253                 if (atomic_sub_return(num_entries, &scrq->used) <=
4254                     (adapter->req_tx_entries_per_subcrq / 2) &&
4255                     __netif_subqueue_stopped(adapter->netdev,
4256                                              scrq->pool_index)) {
4257                         rcu_read_lock();
4258                         if (adapter->tx_queues_active) {
4259                                 netif_wake_subqueue(adapter->netdev,
4260                                                     scrq->pool_index);
4261                                 netdev_dbg(adapter->netdev,
4262                                            "Started queue %d\n",
4263                                            scrq->pool_index);
4264                         }
4265                         rcu_read_unlock();
4266                 }
4267         }
4268
4269         enable_scrq_irq(adapter, scrq);
4270
4271         if (pending_scrq(adapter, scrq)) {
4272                 disable_scrq_irq(adapter, scrq);
4273                 goto restart_loop;
4274         }
4275
4276         txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
4277         netdev_tx_completed_queue(txq, num_packets, total_bytes);
4278
4279         return 0;
4280 }
4281
4282 static irqreturn_t ibmvnic_interrupt_tx(int irq, void *instance)
4283 {
4284         struct ibmvnic_sub_crq_queue *scrq = instance;
4285         struct ibmvnic_adapter *adapter = scrq->adapter;
4286
4287         disable_scrq_irq(adapter, scrq);
4288         ibmvnic_complete_tx(adapter, scrq);
4289
4290         return IRQ_HANDLED;
4291 }
4292
4293 static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance)
4294 {
4295         struct ibmvnic_sub_crq_queue *scrq = instance;
4296         struct ibmvnic_adapter *adapter = scrq->adapter;
4297
4298         /* When booting a kdump kernel we can hit pending interrupts
4299          * prior to completing driver initialization.
4300          */
4301         if (unlikely(adapter->state != VNIC_OPEN))
4302                 return IRQ_NONE;
4303
4304         adapter->rx_stats_buffers[scrq->scrq_num].interrupts++;
4305
4306         if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) {
4307                 disable_scrq_irq(adapter, scrq);
4308                 __napi_schedule(&adapter->napi[scrq->scrq_num]);
4309         }
4310
4311         return IRQ_HANDLED;
4312 }
4313
4314 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
4315 {
4316         struct device *dev = &adapter->vdev->dev;
4317         struct ibmvnic_sub_crq_queue *scrq;
4318         int i = 0, j = 0;
4319         int rc = 0;
4320
4321         for (i = 0; i < adapter->req_tx_queues; i++) {
4322                 netdev_dbg(adapter->netdev, "Initializing tx_scrq[%d] irq\n",
4323                            i);
4324                 scrq = adapter->tx_scrq[i];
4325                 scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
4326
4327                 if (!scrq->irq) {
4328                         rc = -EINVAL;
4329                         dev_err(dev, "Error mapping irq\n");
4330                         goto req_tx_irq_failed;
4331                 }
4332
4333                 snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-tx%d",
4334                          adapter->vdev->unit_address, i);
4335                 rc = request_irq(scrq->irq, ibmvnic_interrupt_tx,
4336                                  0, scrq->name, scrq);
4337
4338                 if (rc) {
4339                         dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
4340                                 scrq->irq, rc);
4341                         irq_dispose_mapping(scrq->irq);
4342                         goto req_tx_irq_failed;
4343                 }
4344         }
4345
4346         for (i = 0; i < adapter->req_rx_queues; i++) {
4347                 netdev_dbg(adapter->netdev, "Initializing rx_scrq[%d] irq\n",
4348                            i);
4349                 scrq = adapter->rx_scrq[i];
4350                 scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
4351                 if (!scrq->irq) {
4352                         rc = -EINVAL;
4353                         dev_err(dev, "Error mapping irq\n");
4354                         goto req_rx_irq_failed;
4355                 }
4356                 snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-rx%d",
4357                          adapter->vdev->unit_address, i);
4358                 rc = request_irq(scrq->irq, ibmvnic_interrupt_rx,
4359                                  0, scrq->name, scrq);
4360                 if (rc) {
4361                         dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n",
4362                                 scrq->irq, rc);
4363                         irq_dispose_mapping(scrq->irq);
4364                         goto req_rx_irq_failed;
4365                 }
4366         }
4367
4368         cpus_read_lock();
4369         ibmvnic_set_affinity(adapter);
4370         cpus_read_unlock();
4371
4372         return rc;
4373
4374 req_rx_irq_failed:
4375         for (j = 0; j < i; j++) {
4376                 free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
4377                 irq_dispose_mapping(adapter->rx_scrq[j]->irq);
4378         }
4379         i = adapter->req_tx_queues;
4380 req_tx_irq_failed:
4381         for (j = 0; j < i; j++) {
4382                 free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
4383                 irq_dispose_mapping(adapter->tx_scrq[j]->irq);
4384         }
4385         release_sub_crqs(adapter, 1);
4386         return rc;
4387 }
4388
4389 static int init_sub_crqs(struct ibmvnic_adapter *adapter)
4390 {
4391         struct device *dev = &adapter->vdev->dev;
4392         struct ibmvnic_sub_crq_queue **allqueues;
4393         int registered_queues = 0;
4394         int total_queues;
4395         int more = 0;
4396         int i;
4397
4398         total_queues = adapter->req_tx_queues + adapter->req_rx_queues;
4399
4400         allqueues = kcalloc(total_queues, sizeof(*allqueues), GFP_KERNEL);
4401         if (!allqueues)
4402                 return -ENOMEM;
4403
4404         for (i = 0; i < total_queues; i++) {
4405                 allqueues[i] = init_sub_crq_queue(adapter);
4406                 if (!allqueues[i]) {
4407                         dev_warn(dev, "Couldn't allocate all sub-crqs\n");
4408                         break;
4409                 }
4410                 registered_queues++;
4411         }
4412
4413         /* Make sure we were able to register the minimum number of queues */
4414         if (registered_queues <
4415             adapter->min_tx_queues + adapter->min_rx_queues) {
4416                 dev_err(dev, "Fatal: Couldn't init  min number of sub-crqs\n");
4417                 goto tx_failed;
4418         }
4419
4420         /* Distribute the failed allocated queues*/
4421         for (i = 0; i < total_queues - registered_queues + more ; i++) {
4422                 netdev_dbg(adapter->netdev, "Reducing number of queues\n");
4423                 switch (i % 3) {
4424                 case 0:
4425                         if (adapter->req_rx_queues > adapter->min_rx_queues)
4426                                 adapter->req_rx_queues--;
4427                         else
4428                                 more++;
4429                         break;
4430                 case 1:
4431                         if (adapter->req_tx_queues > adapter->min_tx_queues)
4432                                 adapter->req_tx_queues--;
4433                         else
4434                                 more++;
4435                         break;
4436                 }
4437         }
4438
4439         adapter->tx_scrq = kcalloc(adapter->req_tx_queues,
4440                                    sizeof(*adapter->tx_scrq), GFP_KERNEL);
4441         if (!adapter->tx_scrq)
4442                 goto tx_failed;
4443
4444         for (i = 0; i < adapter->req_tx_queues; i++) {
4445                 adapter->tx_scrq[i] = allqueues[i];
4446                 adapter->tx_scrq[i]->pool_index = i;
4447                 adapter->num_active_tx_scrqs++;
4448         }
4449
4450         adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
4451                                    sizeof(*adapter->rx_scrq), GFP_KERNEL);
4452         if (!adapter->rx_scrq)
4453                 goto rx_failed;
4454
4455         for (i = 0; i < adapter->req_rx_queues; i++) {
4456                 adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
4457                 adapter->rx_scrq[i]->scrq_num = i;
4458                 adapter->num_active_rx_scrqs++;
4459         }
4460
4461         kfree(allqueues);
4462         return 0;
4463
4464 rx_failed:
4465         kfree(adapter->tx_scrq);
4466         adapter->tx_scrq = NULL;
4467 tx_failed:
4468         for (i = 0; i < registered_queues; i++)
4469                 release_sub_crq_queue(adapter, allqueues[i], 1);
4470         kfree(allqueues);
4471         return -ENOMEM;
4472 }
4473
4474 static void send_request_cap(struct ibmvnic_adapter *adapter, int retry)
4475 {
4476         struct device *dev = &adapter->vdev->dev;
4477         union ibmvnic_crq crq;
4478         int max_entries;
4479         int cap_reqs;
4480
4481         /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on
4482          * the PROMISC flag). Initialize this count upfront. When the tasklet
4483          * receives a response to all of these, it will send the next protocol
4484          * message (QUERY_IP_OFFLOAD).
4485          */
4486         if (!(adapter->netdev->flags & IFF_PROMISC) ||
4487             adapter->promisc_supported)
4488                 cap_reqs = 7;
4489         else
4490                 cap_reqs = 6;
4491
4492         if (!retry) {
4493                 /* Sub-CRQ entries are 32 byte long */
4494                 int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4);
4495
4496                 atomic_set(&adapter->running_cap_crqs, cap_reqs);
4497
4498                 if (adapter->min_tx_entries_per_subcrq > entries_page ||
4499                     adapter->min_rx_add_entries_per_subcrq > entries_page) {
4500                         dev_err(dev, "Fatal, invalid entries per sub-crq\n");
4501                         return;
4502                 }
4503
4504                 if (adapter->desired.mtu)
4505                         adapter->req_mtu = adapter->desired.mtu;
4506                 else
4507                         adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN;
4508
4509                 if (!adapter->desired.tx_entries)
4510                         adapter->desired.tx_entries =
4511                                         adapter->max_tx_entries_per_subcrq;
4512                 if (!adapter->desired.rx_entries)
4513                         adapter->desired.rx_entries =
4514                                         adapter->max_rx_add_entries_per_subcrq;
4515
4516                 max_entries = IBMVNIC_LTB_SET_SIZE /
4517                               (adapter->req_mtu + IBMVNIC_BUFFER_HLEN);
4518
4519                 if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) *
4520                         adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) {
4521                         adapter->desired.tx_entries = max_entries;
4522                 }
4523
4524                 if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) *
4525                         adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) {
4526                         adapter->desired.rx_entries = max_entries;
4527                 }
4528
4529                 if (adapter->desired.tx_entries)
4530                         adapter->req_tx_entries_per_subcrq =
4531                                         adapter->desired.tx_entries;
4532                 else
4533                         adapter->req_tx_entries_per_subcrq =
4534                                         adapter->max_tx_entries_per_subcrq;
4535
4536                 if (adapter->desired.rx_entries)
4537                         adapter->req_rx_add_entries_per_subcrq =
4538                                         adapter->desired.rx_entries;
4539                 else
4540                         adapter->req_rx_add_entries_per_subcrq =
4541                                         adapter->max_rx_add_entries_per_subcrq;
4542
4543                 if (adapter->desired.tx_queues)
4544                         adapter->req_tx_queues =
4545                                         adapter->desired.tx_queues;
4546                 else
4547                         adapter->req_tx_queues =
4548                                         adapter->opt_tx_comp_sub_queues;
4549
4550                 if (adapter->desired.rx_queues)
4551                         adapter->req_rx_queues =
4552                                         adapter->desired.rx_queues;
4553                 else
4554                         adapter->req_rx_queues =
4555                                         adapter->opt_rx_comp_queues;
4556
4557                 adapter->req_rx_add_queues = adapter->max_rx_add_queues;
4558         } else {
4559                 atomic_add(cap_reqs, &adapter->running_cap_crqs);
4560         }
4561         memset(&crq, 0, sizeof(crq));
4562         crq.request_capability.first = IBMVNIC_CRQ_CMD;
4563         crq.request_capability.cmd = REQUEST_CAPABILITY;
4564
4565         crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES);
4566         crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues);
4567         cap_reqs--;
4568         ibmvnic_send_crq(adapter, &crq);
4569
4570         crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES);
4571         crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues);
4572         cap_reqs--;
4573         ibmvnic_send_crq(adapter, &crq);
4574
4575         crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES);
4576         crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues);
4577         cap_reqs--;
4578         ibmvnic_send_crq(adapter, &crq);
4579
4580         crq.request_capability.capability =
4581             cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ);
4582         crq.request_capability.number =
4583             cpu_to_be64(adapter->req_tx_entries_per_subcrq);
4584         cap_reqs--;
4585         ibmvnic_send_crq(adapter, &crq);
4586
4587         crq.request_capability.capability =
4588             cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ);
4589         crq.request_capability.number =
4590             cpu_to_be64(adapter->req_rx_add_entries_per_subcrq);
4591         cap_reqs--;
4592         ibmvnic_send_crq(adapter, &crq);
4593
4594         crq.request_capability.capability = cpu_to_be16(REQ_MTU);
4595         crq.request_capability.number = cpu_to_be64(adapter->req_mtu);
4596         cap_reqs--;
4597         ibmvnic_send_crq(adapter, &crq);
4598
4599         if (adapter->netdev->flags & IFF_PROMISC) {
4600                 if (adapter->promisc_supported) {
4601                         crq.request_capability.capability =
4602                             cpu_to_be16(PROMISC_REQUESTED);
4603                         crq.request_capability.number = cpu_to_be64(1);
4604                         cap_reqs--;
4605                         ibmvnic_send_crq(adapter, &crq);
4606                 }
4607         } else {
4608                 crq.request_capability.capability =
4609                     cpu_to_be16(PROMISC_REQUESTED);
4610                 crq.request_capability.number = cpu_to_be64(0);
4611                 cap_reqs--;
4612                 ibmvnic_send_crq(adapter, &crq);
4613         }
4614
4615         /* Keep at end to catch any discrepancy between expected and actual
4616          * CRQs sent.
4617          */
4618         WARN_ON(cap_reqs != 0);
4619 }
4620
4621 static int pending_scrq(struct ibmvnic_adapter *adapter,
4622                         struct ibmvnic_sub_crq_queue *scrq)
4623 {
4624         union sub_crq *entry = &scrq->msgs[scrq->cur];
4625         int rc;
4626
4627         rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP);
4628
4629         /* Ensure that the SCRQ valid flag is loaded prior to loading the
4630          * contents of the SCRQ descriptor
4631          */
4632         dma_rmb();
4633
4634         return rc;
4635 }
4636
4637 static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter,
4638                                         struct ibmvnic_sub_crq_queue *scrq)
4639 {
4640         union sub_crq *entry;
4641         unsigned long flags;
4642
4643         spin_lock_irqsave(&scrq->lock, flags);
4644         entry = &scrq->msgs[scrq->cur];
4645         if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP) {
4646                 if (++scrq->cur == scrq->size)
4647                         scrq->cur = 0;
4648         } else {
4649                 entry = NULL;
4650         }
4651         spin_unlock_irqrestore(&scrq->lock, flags);
4652
4653         /* Ensure that the SCRQ valid flag is loaded prior to loading the
4654          * contents of the SCRQ descriptor
4655          */
4656         dma_rmb();
4657
4658         return entry;
4659 }
4660
4661 static union ibmvnic_crq *ibmvnic_next_crq(struct ibmvnic_adapter *adapter)
4662 {
4663         struct ibmvnic_crq_queue *queue = &adapter->crq;
4664         union ibmvnic_crq *crq;
4665
4666         crq = &queue->msgs[queue->cur];
4667         if (crq->generic.first & IBMVNIC_CRQ_CMD_RSP) {
4668                 if (++queue->cur == queue->size)
4669                         queue->cur = 0;
4670         } else {
4671                 crq = NULL;
4672         }
4673
4674         return crq;
4675 }
4676
4677 static void print_subcrq_error(struct device *dev, int rc, const char *func)
4678 {
4679         switch (rc) {
4680         case H_PARAMETER:
4681                 dev_warn_ratelimited(dev,
4682                                      "%s failed: Send request is malformed or adapter failover pending. (rc=%d)\n",
4683                                      func, rc);
4684                 break;
4685         case H_CLOSED:
4686                 dev_warn_ratelimited(dev,
4687                                      "%s failed: Backing queue closed. Adapter is down or failover pending. (rc=%d)\n",
4688                                      func, rc);
4689                 break;
4690         default:
4691                 dev_err_ratelimited(dev, "%s failed: (rc=%d)\n", func, rc);
4692                 break;
4693         }
4694 }
4695
4696 static int send_subcrq_indirect(struct ibmvnic_adapter *adapter,
4697                                 u64 remote_handle, u64 ioba, u64 num_entries)
4698 {
4699         unsigned int ua = adapter->vdev->unit_address;
4700         struct device *dev = &adapter->vdev->dev;
4701         int rc;
4702
4703         /* Make sure the hypervisor sees the complete request */
4704         dma_wmb();
4705         rc = plpar_hcall_norets(H_SEND_SUB_CRQ_INDIRECT, ua,
4706                                 cpu_to_be64(remote_handle),
4707                                 ioba, num_entries);
4708
4709         if (rc)
4710                 print_subcrq_error(dev, rc, __func__);
4711
4712         return rc;
4713 }
4714
4715 static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
4716                             union ibmvnic_crq *crq)
4717 {
4718         unsigned int ua = adapter->vdev->unit_address;
4719         struct device *dev = &adapter->vdev->dev;
4720         u64 *u64_crq = (u64 *)crq;
4721         int rc;
4722
4723         netdev_dbg(adapter->netdev, "Sending CRQ: %016lx %016lx\n",
4724                    (unsigned long)cpu_to_be64(u64_crq[0]),
4725                    (unsigned long)cpu_to_be64(u64_crq[1]));
4726
4727         if (!adapter->crq.active &&
4728             crq->generic.first != IBMVNIC_CRQ_INIT_CMD) {
4729                 dev_warn(dev, "Invalid request detected while CRQ is inactive, possible device state change during reset\n");
4730                 return -EINVAL;
4731         }
4732
4733         /* Make sure the hypervisor sees the complete request */
4734         dma_wmb();
4735
4736         rc = plpar_hcall_norets(H_SEND_CRQ, ua,
4737                                 cpu_to_be64(u64_crq[0]),
4738                                 cpu_to_be64(u64_crq[1]));
4739
4740         if (rc) {
4741                 if (rc == H_CLOSED) {
4742                         dev_warn(dev, "CRQ Queue closed\n");
4743                         /* do not reset, report the fail, wait for passive init from server */
4744                 }
4745
4746                 dev_warn(dev, "Send error (rc=%d)\n", rc);
4747         }
4748
4749         return rc;
4750 }
4751
4752 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter)
4753 {
4754         struct device *dev = &adapter->vdev->dev;
4755         union ibmvnic_crq crq;
4756         int retries = 100;
4757         int rc;
4758
4759         memset(&crq, 0, sizeof(crq));
4760         crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
4761         crq.generic.cmd = IBMVNIC_CRQ_INIT;
4762         netdev_dbg(adapter->netdev, "Sending CRQ init\n");
4763
4764         do {
4765                 rc = ibmvnic_send_crq(adapter, &crq);
4766                 if (rc != H_CLOSED)
4767                         break;
4768                 retries--;
4769                 msleep(50);
4770
4771         } while (retries > 0);
4772
4773         if (rc) {
4774                 dev_err(dev, "Failed to send init request, rc = %d\n", rc);
4775                 return rc;
4776         }
4777
4778         return 0;
4779 }
4780
4781 struct vnic_login_client_data {
4782         u8      type;
4783         __be16  len;
4784         char    name[];
4785 } __packed;
4786
4787 static int vnic_client_data_len(struct ibmvnic_adapter *adapter)
4788 {
4789         int len;
4790
4791         /* Calculate the amount of buffer space needed for the
4792          * vnic client data in the login buffer. There are four entries,
4793          * OS name, LPAR name, device name, and a null last entry.
4794          */
4795         len = 4 * sizeof(struct vnic_login_client_data);
4796         len += 6; /* "Linux" plus NULL */
4797         len += strlen(utsname()->nodename) + 1;
4798         len += strlen(adapter->netdev->name) + 1;
4799
4800         return len;
4801 }
4802
4803 static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
4804                                  struct vnic_login_client_data *vlcd)
4805 {
4806         const char *os_name = "Linux";
4807         int len;
4808
4809         /* Type 1 - LPAR OS */
4810         vlcd->type = 1;
4811         len = strlen(os_name) + 1;
4812         vlcd->len = cpu_to_be16(len);
4813         strscpy(vlcd->name, os_name, len);
4814         vlcd = (struct vnic_login_client_data *)(vlcd->name + len);
4815
4816         /* Type 2 - LPAR name */
4817         vlcd->type = 2;
4818         len = strlen(utsname()->nodename) + 1;
4819         vlcd->len = cpu_to_be16(len);
4820         strscpy(vlcd->name, utsname()->nodename, len);
4821         vlcd = (struct vnic_login_client_data *)(vlcd->name + len);
4822
4823         /* Type 3 - device name */
4824         vlcd->type = 3;
4825         len = strlen(adapter->netdev->name) + 1;
4826         vlcd->len = cpu_to_be16(len);
4827         strscpy(vlcd->name, adapter->netdev->name, len);
4828 }
4829
4830 static int send_login(struct ibmvnic_adapter *adapter)
4831 {
4832         struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
4833         struct ibmvnic_login_buffer *login_buffer;
4834         struct device *dev = &adapter->vdev->dev;
4835         struct vnic_login_client_data *vlcd;
4836         dma_addr_t rsp_buffer_token;
4837         dma_addr_t buffer_token;
4838         size_t rsp_buffer_size;
4839         union ibmvnic_crq crq;
4840         int client_data_len;
4841         size_t buffer_size;
4842         __be64 *tx_list_p;
4843         __be64 *rx_list_p;
4844         int rc;
4845         int i;
4846
4847         if (!adapter->tx_scrq || !adapter->rx_scrq) {
4848                 netdev_err(adapter->netdev,
4849                            "RX or TX queues are not allocated, device login failed\n");
4850                 return -ENOMEM;
4851         }
4852
4853         release_login_buffer(adapter);
4854         release_login_rsp_buffer(adapter);
4855
4856         client_data_len = vnic_client_data_len(adapter);
4857
4858         buffer_size =
4859             sizeof(struct ibmvnic_login_buffer) +
4860             sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) +
4861             client_data_len;
4862
4863         login_buffer = kzalloc(buffer_size, GFP_ATOMIC);
4864         if (!login_buffer)
4865                 goto buf_alloc_failed;
4866
4867         buffer_token = dma_map_single(dev, login_buffer, buffer_size,
4868                                       DMA_TO_DEVICE);
4869         if (dma_mapping_error(dev, buffer_token)) {
4870                 dev_err(dev, "Couldn't map login buffer\n");
4871                 goto buf_map_failed;
4872         }
4873
4874         rsp_buffer_size = sizeof(struct ibmvnic_login_rsp_buffer) +
4875                           sizeof(u64) * adapter->req_tx_queues +
4876                           sizeof(u64) * adapter->req_rx_queues +
4877                           sizeof(u64) * adapter->req_rx_queues +
4878                           sizeof(u8) * IBMVNIC_TX_DESC_VERSIONS;
4879
4880         login_rsp_buffer = kmalloc(rsp_buffer_size, GFP_ATOMIC);
4881         if (!login_rsp_buffer)
4882                 goto buf_rsp_alloc_failed;
4883
4884         rsp_buffer_token = dma_map_single(dev, login_rsp_buffer,
4885                                           rsp_buffer_size, DMA_FROM_DEVICE);
4886         if (dma_mapping_error(dev, rsp_buffer_token)) {
4887                 dev_err(dev, "Couldn't map login rsp buffer\n");
4888                 goto buf_rsp_map_failed;
4889         }
4890
4891         adapter->login_buf = login_buffer;
4892         adapter->login_buf_token = buffer_token;
4893         adapter->login_buf_sz = buffer_size;
4894         adapter->login_rsp_buf = login_rsp_buffer;
4895         adapter->login_rsp_buf_token = rsp_buffer_token;
4896         adapter->login_rsp_buf_sz = rsp_buffer_size;
4897
4898         login_buffer->len = cpu_to_be32(buffer_size);
4899         login_buffer->version = cpu_to_be32(INITIAL_VERSION_LB);
4900         login_buffer->num_txcomp_subcrqs = cpu_to_be32(adapter->req_tx_queues);
4901         login_buffer->off_txcomp_subcrqs =
4902             cpu_to_be32(sizeof(struct ibmvnic_login_buffer));
4903         login_buffer->num_rxcomp_subcrqs = cpu_to_be32(adapter->req_rx_queues);
4904         login_buffer->off_rxcomp_subcrqs =
4905             cpu_to_be32(sizeof(struct ibmvnic_login_buffer) +
4906                         sizeof(u64) * adapter->req_tx_queues);
4907         login_buffer->login_rsp_ioba = cpu_to_be32(rsp_buffer_token);
4908         login_buffer->login_rsp_len = cpu_to_be32(rsp_buffer_size);
4909
4910         tx_list_p = (__be64 *)((char *)login_buffer +
4911                                       sizeof(struct ibmvnic_login_buffer));
4912         rx_list_p = (__be64 *)((char *)login_buffer +
4913                                       sizeof(struct ibmvnic_login_buffer) +
4914                                       sizeof(u64) * adapter->req_tx_queues);
4915
4916         for (i = 0; i < adapter->req_tx_queues; i++) {
4917                 if (adapter->tx_scrq[i]) {
4918                         tx_list_p[i] =
4919                                 cpu_to_be64(adapter->tx_scrq[i]->crq_num);
4920                 }
4921         }
4922
4923         for (i = 0; i < adapter->req_rx_queues; i++) {
4924                 if (adapter->rx_scrq[i]) {
4925                         rx_list_p[i] =
4926                                 cpu_to_be64(adapter->rx_scrq[i]->crq_num);
4927                 }
4928         }
4929
4930         /* Insert vNIC login client data */
4931         vlcd = (struct vnic_login_client_data *)
4932                 ((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues));
4933         login_buffer->client_data_offset =
4934                         cpu_to_be32((char *)vlcd - (char *)login_buffer);
4935         login_buffer->client_data_len = cpu_to_be32(client_data_len);
4936
4937         vnic_add_client_data(adapter, vlcd);
4938
4939         netdev_dbg(adapter->netdev, "Login Buffer:\n");
4940         for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) {
4941                 netdev_dbg(adapter->netdev, "%016lx\n",
4942                            ((unsigned long *)(adapter->login_buf))[i]);
4943         }
4944
4945         memset(&crq, 0, sizeof(crq));
4946         crq.login.first = IBMVNIC_CRQ_CMD;
4947         crq.login.cmd = LOGIN;
4948         crq.login.ioba = cpu_to_be32(buffer_token);
4949         crq.login.len = cpu_to_be32(buffer_size);
4950
4951         adapter->login_pending = true;
4952         rc = ibmvnic_send_crq(adapter, &crq);
4953         if (rc) {
4954                 adapter->login_pending = false;
4955                 netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
4956                 goto buf_send_failed;
4957         }
4958
4959         return 0;
4960
4961 buf_send_failed:
4962         dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
4963                          DMA_FROM_DEVICE);
4964 buf_rsp_map_failed:
4965         kfree(login_rsp_buffer);
4966         adapter->login_rsp_buf = NULL;
4967 buf_rsp_alloc_failed:
4968         dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE);
4969 buf_map_failed:
4970         kfree(login_buffer);
4971         adapter->login_buf = NULL;
4972 buf_alloc_failed:
4973         return -ENOMEM;
4974 }
4975
4976 static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
4977                             u32 len, u8 map_id)
4978 {
4979         union ibmvnic_crq crq;
4980
4981         memset(&crq, 0, sizeof(crq));
4982         crq.request_map.first = IBMVNIC_CRQ_CMD;
4983         crq.request_map.cmd = REQUEST_MAP;
4984         crq.request_map.map_id = map_id;
4985         crq.request_map.ioba = cpu_to_be32(addr);
4986         crq.request_map.len = cpu_to_be32(len);
4987         return ibmvnic_send_crq(adapter, &crq);
4988 }
4989
4990 static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id)
4991 {
4992         union ibmvnic_crq crq;
4993
4994         memset(&crq, 0, sizeof(crq));
4995         crq.request_unmap.first = IBMVNIC_CRQ_CMD;
4996         crq.request_unmap.cmd = REQUEST_UNMAP;
4997         crq.request_unmap.map_id = map_id;
4998         return ibmvnic_send_crq(adapter, &crq);
4999 }
5000
5001 static void send_query_map(struct ibmvnic_adapter *adapter)
5002 {
5003         union ibmvnic_crq crq;
5004
5005         memset(&crq, 0, sizeof(crq));
5006         crq.query_map.first = IBMVNIC_CRQ_CMD;
5007         crq.query_map.cmd = QUERY_MAP;
5008         ibmvnic_send_crq(adapter, &crq);
5009 }
5010
5011 /* Send a series of CRQs requesting various capabilities of the VNIC server */
5012 static void send_query_cap(struct ibmvnic_adapter *adapter)
5013 {
5014         union ibmvnic_crq crq;
5015         int cap_reqs;
5016
5017         /* We send out 25 QUERY_CAPABILITY CRQs below.  Initialize this count
5018          * upfront. When the tasklet receives a response to all of these, it
5019          * can send out the next protocol messaage (REQUEST_CAPABILITY).
5020          */
5021         cap_reqs = 25;
5022
5023         atomic_set(&adapter->running_cap_crqs, cap_reqs);
5024
5025         memset(&crq, 0, sizeof(crq));
5026         crq.query_capability.first = IBMVNIC_CRQ_CMD;
5027         crq.query_capability.cmd = QUERY_CAPABILITY;
5028
5029         crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES);
5030         ibmvnic_send_crq(adapter, &crq);
5031         cap_reqs--;
5032
5033         crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES);
5034         ibmvnic_send_crq(adapter, &crq);
5035         cap_reqs--;
5036
5037         crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES);
5038         ibmvnic_send_crq(adapter, &crq);
5039         cap_reqs--;
5040
5041         crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES);
5042         ibmvnic_send_crq(adapter, &crq);
5043         cap_reqs--;
5044
5045         crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES);
5046         ibmvnic_send_crq(adapter, &crq);
5047         cap_reqs--;
5048
5049         crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES);
5050         ibmvnic_send_crq(adapter, &crq);
5051         cap_reqs--;
5052
5053         crq.query_capability.capability =
5054             cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ);
5055         ibmvnic_send_crq(adapter, &crq);
5056         cap_reqs--;
5057
5058         crq.query_capability.capability =
5059             cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ);
5060         ibmvnic_send_crq(adapter, &crq);
5061         cap_reqs--;
5062
5063         crq.query_capability.capability =
5064             cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ);
5065         ibmvnic_send_crq(adapter, &crq);
5066         cap_reqs--;
5067
5068         crq.query_capability.capability =
5069             cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ);
5070         ibmvnic_send_crq(adapter, &crq);
5071         cap_reqs--;
5072
5073         crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD);
5074         ibmvnic_send_crq(adapter, &crq);
5075         cap_reqs--;
5076
5077         crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED);
5078         ibmvnic_send_crq(adapter, &crq);
5079         cap_reqs--;
5080
5081         crq.query_capability.capability = cpu_to_be16(MIN_MTU);
5082         ibmvnic_send_crq(adapter, &crq);
5083         cap_reqs--;
5084
5085         crq.query_capability.capability = cpu_to_be16(MAX_MTU);
5086         ibmvnic_send_crq(adapter, &crq);
5087         cap_reqs--;
5088
5089         crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS);
5090         ibmvnic_send_crq(adapter, &crq);
5091         cap_reqs--;
5092
5093         crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION);
5094         ibmvnic_send_crq(adapter, &crq);
5095         cap_reqs--;
5096
5097         crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION);
5098         ibmvnic_send_crq(adapter, &crq);
5099         cap_reqs--;
5100
5101         crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES);
5102         ibmvnic_send_crq(adapter, &crq);
5103         cap_reqs--;
5104
5105         crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED);
5106         ibmvnic_send_crq(adapter, &crq);
5107         cap_reqs--;
5108
5109         crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES);
5110         ibmvnic_send_crq(adapter, &crq);
5111         cap_reqs--;
5112
5113         crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES);
5114         ibmvnic_send_crq(adapter, &crq);
5115         cap_reqs--;
5116
5117         crq.query_capability.capability =
5118                         cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q);
5119         ibmvnic_send_crq(adapter, &crq);
5120         cap_reqs--;
5121
5122         crq.query_capability.capability =
5123                         cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ);
5124         ibmvnic_send_crq(adapter, &crq);
5125         cap_reqs--;
5126
5127         crq.query_capability.capability =
5128                         cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ);
5129         ibmvnic_send_crq(adapter, &crq);
5130         cap_reqs--;
5131
5132         crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ);
5133
5134         ibmvnic_send_crq(adapter, &crq);
5135         cap_reqs--;
5136
5137         /* Keep at end to catch any discrepancy between expected and actual
5138          * CRQs sent.
5139          */
5140         WARN_ON(cap_reqs != 0);
5141 }
5142
5143 static void send_query_ip_offload(struct ibmvnic_adapter *adapter)
5144 {
5145         int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer);
5146         struct device *dev = &adapter->vdev->dev;
5147         union ibmvnic_crq crq;
5148
5149         adapter->ip_offload_tok =
5150                 dma_map_single(dev,
5151                                &adapter->ip_offload_buf,
5152                                buf_sz,
5153                                DMA_FROM_DEVICE);
5154
5155         if (dma_mapping_error(dev, adapter->ip_offload_tok)) {
5156                 if (!firmware_has_feature(FW_FEATURE_CMO))
5157                         dev_err(dev, "Couldn't map offload buffer\n");
5158                 return;
5159         }
5160
5161         memset(&crq, 0, sizeof(crq));
5162         crq.query_ip_offload.first = IBMVNIC_CRQ_CMD;
5163         crq.query_ip_offload.cmd = QUERY_IP_OFFLOAD;
5164         crq.query_ip_offload.len = cpu_to_be32(buf_sz);
5165         crq.query_ip_offload.ioba =
5166             cpu_to_be32(adapter->ip_offload_tok);
5167
5168         ibmvnic_send_crq(adapter, &crq);
5169 }
5170
5171 static void send_control_ip_offload(struct ibmvnic_adapter *adapter)
5172 {
5173         struct ibmvnic_control_ip_offload_buffer *ctrl_buf = &adapter->ip_offload_ctrl;
5174         struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
5175         struct device *dev = &adapter->vdev->dev;
5176         netdev_features_t old_hw_features = 0;
5177         union ibmvnic_crq crq;
5178
5179         adapter->ip_offload_ctrl_tok =
5180                 dma_map_single(dev,
5181                                ctrl_buf,
5182                                sizeof(adapter->ip_offload_ctrl),
5183                                DMA_TO_DEVICE);
5184
5185         if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) {
5186                 dev_err(dev, "Couldn't map ip offload control buffer\n");
5187                 return;
5188         }
5189
5190         ctrl_buf->len = cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
5191         ctrl_buf->version = cpu_to_be32(INITIAL_VERSION_IOB);
5192         ctrl_buf->ipv4_chksum = buf->ipv4_chksum;
5193         ctrl_buf->ipv6_chksum = buf->ipv6_chksum;
5194         ctrl_buf->tcp_ipv4_chksum = buf->tcp_ipv4_chksum;
5195         ctrl_buf->udp_ipv4_chksum = buf->udp_ipv4_chksum;
5196         ctrl_buf->tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
5197         ctrl_buf->udp_ipv6_chksum = buf->udp_ipv6_chksum;
5198         ctrl_buf->large_tx_ipv4 = buf->large_tx_ipv4;
5199         ctrl_buf->large_tx_ipv6 = buf->large_tx_ipv6;
5200
5201         /* large_rx disabled for now, additional features needed */
5202         ctrl_buf->large_rx_ipv4 = 0;
5203         ctrl_buf->large_rx_ipv6 = 0;
5204
5205         if (adapter->state != VNIC_PROBING) {
5206                 old_hw_features = adapter->netdev->hw_features;
5207                 adapter->netdev->hw_features = 0;
5208         }
5209
5210         adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
5211
5212         if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
5213                 adapter->netdev->hw_features |= NETIF_F_IP_CSUM;
5214
5215         if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum)
5216                 adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM;
5217
5218         if ((adapter->netdev->features &
5219             (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
5220                 adapter->netdev->hw_features |= NETIF_F_RXCSUM;
5221
5222         if (buf->large_tx_ipv4)
5223                 adapter->netdev->hw_features |= NETIF_F_TSO;
5224         if (buf->large_tx_ipv6)
5225                 adapter->netdev->hw_features |= NETIF_F_TSO6;
5226
5227         if (adapter->state == VNIC_PROBING) {
5228                 adapter->netdev->features |= adapter->netdev->hw_features;
5229         } else if (old_hw_features != adapter->netdev->hw_features) {
5230                 netdev_features_t tmp = 0;
5231
5232                 /* disable features no longer supported */
5233                 adapter->netdev->features &= adapter->netdev->hw_features;
5234                 /* turn on features now supported if previously enabled */
5235                 tmp = (old_hw_features ^ adapter->netdev->hw_features) &
5236                         adapter->netdev->hw_features;
5237                 adapter->netdev->features |=
5238                                 tmp & adapter->netdev->wanted_features;
5239         }
5240
5241         memset(&crq, 0, sizeof(crq));
5242         crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
5243         crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD;
5244         crq.control_ip_offload.len =
5245             cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
5246         crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok);
5247         ibmvnic_send_crq(adapter, &crq);
5248 }
5249
5250 static void handle_vpd_size_rsp(union ibmvnic_crq *crq,
5251                                 struct ibmvnic_adapter *adapter)
5252 {
5253         struct device *dev = &adapter->vdev->dev;
5254
5255         if (crq->get_vpd_size_rsp.rc.code) {
5256                 dev_err(dev, "Error retrieving VPD size, rc=%x\n",
5257                         crq->get_vpd_size_rsp.rc.code);
5258                 complete(&adapter->fw_done);
5259                 return;
5260         }
5261
5262         adapter->vpd->len = be64_to_cpu(crq->get_vpd_size_rsp.len);
5263         complete(&adapter->fw_done);
5264 }
5265
5266 static void handle_vpd_rsp(union ibmvnic_crq *crq,
5267                            struct ibmvnic_adapter *adapter)
5268 {
5269         struct device *dev = &adapter->vdev->dev;
5270         unsigned char *substr = NULL;
5271         u8 fw_level_len = 0;
5272
5273         memset(adapter->fw_version, 0, 32);
5274
5275         dma_unmap_single(dev, adapter->vpd->dma_addr, adapter->vpd->len,
5276                          DMA_FROM_DEVICE);
5277
5278         if (crq->get_vpd_rsp.rc.code) {
5279                 dev_err(dev, "Error retrieving VPD from device, rc=%x\n",
5280                         crq->get_vpd_rsp.rc.code);
5281                 goto complete;
5282         }
5283
5284         /* get the position of the firmware version info
5285          * located after the ASCII 'RM' substring in the buffer
5286          */
5287         substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len);
5288         if (!substr) {
5289                 dev_info(dev, "Warning - No FW level has been provided in the VPD buffer by the VIOS Server\n");
5290                 goto complete;
5291         }
5292
5293         /* get length of firmware level ASCII substring */
5294         if ((substr + 2) < (adapter->vpd->buff + adapter->vpd->len)) {
5295                 fw_level_len = *(substr + 2);
5296         } else {
5297                 dev_info(dev, "Length of FW substr extrapolated VDP buff\n");
5298                 goto complete;
5299         }
5300
5301         /* copy firmware version string from vpd into adapter */
5302         if ((substr + 3 + fw_level_len) <
5303             (adapter->vpd->buff + adapter->vpd->len)) {
5304                 strscpy(adapter->fw_version, substr + 3,
5305                         sizeof(adapter->fw_version));
5306         } else {
5307                 dev_info(dev, "FW substr extrapolated VPD buff\n");
5308         }
5309
5310 complete:
5311         if (adapter->fw_version[0] == '\0')
5312                 strscpy((char *)adapter->fw_version, "N/A", sizeof(adapter->fw_version));
5313         complete(&adapter->fw_done);
5314 }
5315
5316 static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
5317 {
5318         struct device *dev = &adapter->vdev->dev;
5319         struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
5320         int i;
5321
5322         dma_unmap_single(dev, adapter->ip_offload_tok,
5323                          sizeof(adapter->ip_offload_buf), DMA_FROM_DEVICE);
5324
5325         netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n");
5326         for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++)
5327                 netdev_dbg(adapter->netdev, "%016lx\n",
5328                            ((unsigned long *)(buf))[i]);
5329
5330         netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum);
5331         netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum);
5332         netdev_dbg(adapter->netdev, "tcp_ipv4_chksum = %d\n",
5333                    buf->tcp_ipv4_chksum);
5334         netdev_dbg(adapter->netdev, "tcp_ipv6_chksum = %d\n",
5335                    buf->tcp_ipv6_chksum);
5336         netdev_dbg(adapter->netdev, "udp_ipv4_chksum = %d\n",
5337                    buf->udp_ipv4_chksum);
5338         netdev_dbg(adapter->netdev, "udp_ipv6_chksum = %d\n",
5339                    buf->udp_ipv6_chksum);
5340         netdev_dbg(adapter->netdev, "large_tx_ipv4 = %d\n",
5341                    buf->large_tx_ipv4);
5342         netdev_dbg(adapter->netdev, "large_tx_ipv6 = %d\n",
5343                    buf->large_tx_ipv6);
5344         netdev_dbg(adapter->netdev, "large_rx_ipv4 = %d\n",
5345                    buf->large_rx_ipv4);
5346         netdev_dbg(adapter->netdev, "large_rx_ipv6 = %d\n",
5347                    buf->large_rx_ipv6);
5348         netdev_dbg(adapter->netdev, "max_ipv4_hdr_sz = %d\n",
5349                    buf->max_ipv4_header_size);
5350         netdev_dbg(adapter->netdev, "max_ipv6_hdr_sz = %d\n",
5351                    buf->max_ipv6_header_size);
5352         netdev_dbg(adapter->netdev, "max_tcp_hdr_size = %d\n",
5353                    buf->max_tcp_header_size);
5354         netdev_dbg(adapter->netdev, "max_udp_hdr_size = %d\n",
5355                    buf->max_udp_header_size);
5356         netdev_dbg(adapter->netdev, "max_large_tx_size = %d\n",
5357                    buf->max_large_tx_size);
5358         netdev_dbg(adapter->netdev, "max_large_rx_size = %d\n",
5359                    buf->max_large_rx_size);
5360         netdev_dbg(adapter->netdev, "ipv6_ext_hdr = %d\n",
5361                    buf->ipv6_extension_header);
5362         netdev_dbg(adapter->netdev, "tcp_pseudosum_req = %d\n",
5363                    buf->tcp_pseudosum_req);
5364         netdev_dbg(adapter->netdev, "num_ipv6_ext_hd = %d\n",
5365                    buf->num_ipv6_ext_headers);
5366         netdev_dbg(adapter->netdev, "off_ipv6_ext_hd = %d\n",
5367                    buf->off_ipv6_ext_headers);
5368
5369         send_control_ip_offload(adapter);
5370 }
5371
5372 static const char *ibmvnic_fw_err_cause(u16 cause)
5373 {
5374         switch (cause) {
5375         case ADAPTER_PROBLEM:
5376                 return "adapter problem";
5377         case BUS_PROBLEM:
5378                 return "bus problem";
5379         case FW_PROBLEM:
5380                 return "firmware problem";
5381         case DD_PROBLEM:
5382                 return "device driver problem";
5383         case EEH_RECOVERY:
5384                 return "EEH recovery";
5385         case FW_UPDATED:
5386                 return "firmware updated";
5387         case LOW_MEMORY:
5388                 return "low Memory";
5389         default:
5390                 return "unknown";
5391         }
5392 }
5393
5394 static void handle_error_indication(union ibmvnic_crq *crq,
5395                                     struct ibmvnic_adapter *adapter)
5396 {
5397         struct device *dev = &adapter->vdev->dev;
5398         u16 cause;
5399
5400         cause = be16_to_cpu(crq->error_indication.error_cause);
5401
5402         dev_warn_ratelimited(dev,
5403                              "Firmware reports %serror, cause: %s. Starting recovery...\n",
5404                              crq->error_indication.flags
5405                                 & IBMVNIC_FATAL_ERROR ? "FATAL " : "",
5406                              ibmvnic_fw_err_cause(cause));
5407
5408         if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR)
5409                 ibmvnic_reset(adapter, VNIC_RESET_FATAL);
5410         else
5411                 ibmvnic_reset(adapter, VNIC_RESET_NON_FATAL);
5412 }
5413
5414 static int handle_change_mac_rsp(union ibmvnic_crq *crq,
5415                                  struct ibmvnic_adapter *adapter)
5416 {
5417         struct net_device *netdev = adapter->netdev;
5418         struct device *dev = &adapter->vdev->dev;
5419         long rc;
5420
5421         rc = crq->change_mac_addr_rsp.rc.code;
5422         if (rc) {
5423                 dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc);
5424                 goto out;
5425         }
5426         /* crq->change_mac_addr.mac_addr is the requested one
5427          * crq->change_mac_addr_rsp.mac_addr is the returned valid one.
5428          */
5429         eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]);
5430         ether_addr_copy(adapter->mac_addr,
5431                         &crq->change_mac_addr_rsp.mac_addr[0]);
5432 out:
5433         complete(&adapter->fw_done);
5434         return rc;
5435 }
5436
5437 static void handle_request_cap_rsp(union ibmvnic_crq *crq,
5438                                    struct ibmvnic_adapter *adapter)
5439 {
5440         struct device *dev = &adapter->vdev->dev;
5441         u64 *req_value;
5442         char *name;
5443
5444         atomic_dec(&adapter->running_cap_crqs);
5445         netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n",
5446                    atomic_read(&adapter->running_cap_crqs));
5447         switch (be16_to_cpu(crq->request_capability_rsp.capability)) {
5448         case REQ_TX_QUEUES:
5449                 req_value = &adapter->req_tx_queues;
5450                 name = "tx";
5451                 break;
5452         case REQ_RX_QUEUES:
5453                 req_value = &adapter->req_rx_queues;
5454                 name = "rx";
5455                 break;
5456         case REQ_RX_ADD_QUEUES:
5457                 req_value = &adapter->req_rx_add_queues;
5458                 name = "rx_add";
5459                 break;
5460         case REQ_TX_ENTRIES_PER_SUBCRQ:
5461                 req_value = &adapter->req_tx_entries_per_subcrq;
5462                 name = "tx_entries_per_subcrq";
5463                 break;
5464         case REQ_RX_ADD_ENTRIES_PER_SUBCRQ:
5465                 req_value = &adapter->req_rx_add_entries_per_subcrq;
5466                 name = "rx_add_entries_per_subcrq";
5467                 break;
5468         case REQ_MTU:
5469                 req_value = &adapter->req_mtu;
5470                 name = "mtu";
5471                 break;
5472         case PROMISC_REQUESTED:
5473                 req_value = &adapter->promisc;
5474                 name = "promisc";
5475                 break;
5476         default:
5477                 dev_err(dev, "Got invalid cap request rsp %d\n",
5478                         crq->request_capability.capability);
5479                 return;
5480         }
5481
5482         switch (crq->request_capability_rsp.rc.code) {
5483         case SUCCESS:
5484                 break;
5485         case PARTIALSUCCESS:
5486                 dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n",
5487                          *req_value,
5488                          (long)be64_to_cpu(crq->request_capability_rsp.number),
5489                          name);
5490
5491                 if (be16_to_cpu(crq->request_capability_rsp.capability) ==
5492                     REQ_MTU) {
5493                         pr_err("mtu of %llu is not supported. Reverting.\n",
5494                                *req_value);
5495                         *req_value = adapter->fallback.mtu;
5496                 } else {
5497                         *req_value =
5498                                 be64_to_cpu(crq->request_capability_rsp.number);
5499                 }
5500
5501                 send_request_cap(adapter, 1);
5502                 return;
5503         default:
5504                 dev_err(dev, "Error %d in request cap rsp\n",
5505                         crq->request_capability_rsp.rc.code);
5506                 return;
5507         }
5508
5509         /* Done receiving requested capabilities, query IP offload support */
5510         if (atomic_read(&adapter->running_cap_crqs) == 0)
5511                 send_query_ip_offload(adapter);
5512 }
5513
5514 static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
5515                             struct ibmvnic_adapter *adapter)
5516 {
5517         struct device *dev = &adapter->vdev->dev;
5518         struct net_device *netdev = adapter->netdev;
5519         struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf;
5520         struct ibmvnic_login_buffer *login = adapter->login_buf;
5521         u64 *tx_handle_array;
5522         u64 *rx_handle_array;
5523         int num_tx_pools;
5524         int num_rx_pools;
5525         u64 *size_array;
5526         u32 rsp_len;
5527         int i;
5528
5529         /* CHECK: Test/set of login_pending does not need to be atomic
5530          * because only ibmvnic_tasklet tests/clears this.
5531          */
5532         if (!adapter->login_pending) {
5533                 netdev_warn(netdev, "Ignoring unexpected login response\n");
5534                 return 0;
5535         }
5536         adapter->login_pending = false;
5537
5538         /* If the number of queues requested can't be allocated by the
5539          * server, the login response will return with code 1. We will need
5540          * to resend the login buffer with fewer queues requested.
5541          */
5542         if (login_rsp_crq->generic.rc.code) {
5543                 adapter->init_done_rc = login_rsp_crq->generic.rc.code;
5544                 complete(&adapter->init_done);
5545                 return 0;
5546         }
5547
5548         if (adapter->failover_pending) {
5549                 adapter->init_done_rc = -EAGAIN;
5550                 netdev_dbg(netdev, "Failover pending, ignoring login response\n");
5551                 complete(&adapter->init_done);
5552                 /* login response buffer will be released on reset */
5553                 return 0;
5554         }
5555
5556         netdev->mtu = adapter->req_mtu - ETH_HLEN;
5557
5558         netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
5559         for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) {
5560                 netdev_dbg(adapter->netdev, "%016lx\n",
5561                            ((unsigned long *)(adapter->login_rsp_buf))[i]);
5562         }
5563
5564         /* Sanity checks */
5565         if (login->num_txcomp_subcrqs != login_rsp->num_txsubm_subcrqs ||
5566             (be32_to_cpu(login->num_rxcomp_subcrqs) *
5567              adapter->req_rx_add_queues !=
5568              be32_to_cpu(login_rsp->num_rxadd_subcrqs))) {
5569                 dev_err(dev, "FATAL: Inconsistent login and login rsp\n");
5570                 ibmvnic_reset(adapter, VNIC_RESET_FATAL);
5571                 return -EIO;
5572         }
5573
5574         rsp_len = be32_to_cpu(login_rsp->len);
5575         if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
5576             rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
5577             rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
5578             rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
5579             rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
5580                 /* This can happen if a login request times out and there are
5581                  * 2 outstanding login requests sent, the LOGIN_RSP crq
5582                  * could have been for the older login request. So we are
5583                  * parsing the newer response buffer which may be incomplete
5584                  */
5585                 dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
5586                 ibmvnic_reset(adapter, VNIC_RESET_FATAL);
5587                 return -EIO;
5588         }
5589
5590         size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
5591                 be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
5592         /* variable buffer sizes are not supported, so just read the
5593          * first entry.
5594          */
5595         adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]);
5596
5597         num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
5598         num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
5599
5600         tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
5601                                   be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
5602         rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
5603                                   be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs));
5604
5605         for (i = 0; i < num_tx_pools; i++)
5606                 adapter->tx_scrq[i]->handle = tx_handle_array[i];
5607
5608         for (i = 0; i < num_rx_pools; i++)
5609                 adapter->rx_scrq[i]->handle = rx_handle_array[i];
5610
5611         adapter->num_active_tx_scrqs = num_tx_pools;
5612         adapter->num_active_rx_scrqs = num_rx_pools;
5613         release_login_rsp_buffer(adapter);
5614         release_login_buffer(adapter);
5615         complete(&adapter->init_done);
5616
5617         return 0;
5618 }
5619
5620 static void handle_request_unmap_rsp(union ibmvnic_crq *crq,
5621                                      struct ibmvnic_adapter *adapter)
5622 {
5623         struct device *dev = &adapter->vdev->dev;
5624         long rc;
5625
5626         rc = crq->request_unmap_rsp.rc.code;
5627         if (rc)
5628                 dev_err(dev, "Error %ld in REQUEST_UNMAP_RSP\n", rc);
5629 }
5630
5631 static void handle_query_map_rsp(union ibmvnic_crq *crq,
5632                                  struct ibmvnic_adapter *adapter)
5633 {
5634         struct net_device *netdev = adapter->netdev;
5635         struct device *dev = &adapter->vdev->dev;
5636         long rc;
5637
5638         rc = crq->query_map_rsp.rc.code;
5639         if (rc) {
5640                 dev_err(dev, "Error %ld in QUERY_MAP_RSP\n", rc);
5641                 return;
5642         }
5643         netdev_dbg(netdev, "page_size = %d\ntot_pages = %u\nfree_pages = %u\n",
5644                    crq->query_map_rsp.page_size,
5645                    __be32_to_cpu(crq->query_map_rsp.tot_pages),
5646                    __be32_to_cpu(crq->query_map_rsp.free_pages));
5647 }
5648
5649 static void handle_query_cap_rsp(union ibmvnic_crq *crq,
5650                                  struct ibmvnic_adapter *adapter)
5651 {
5652         struct net_device *netdev = adapter->netdev;
5653         struct device *dev = &adapter->vdev->dev;
5654         long rc;
5655
5656         atomic_dec(&adapter->running_cap_crqs);
5657         netdev_dbg(netdev, "Outstanding queries: %d\n",
5658                    atomic_read(&adapter->running_cap_crqs));
5659         rc = crq->query_capability.rc.code;
5660         if (rc) {
5661                 dev_err(dev, "Error %ld in QUERY_CAP_RSP\n", rc);
5662                 goto out;
5663         }
5664
5665         switch (be16_to_cpu(crq->query_capability.capability)) {
5666         case MIN_TX_QUEUES:
5667                 adapter->min_tx_queues =
5668                     be64_to_cpu(crq->query_capability.number);
5669                 netdev_dbg(netdev, "min_tx_queues = %lld\n",
5670                            adapter->min_tx_queues);
5671                 break;
5672         case MIN_RX_QUEUES:
5673                 adapter->min_rx_queues =
5674                     be64_to_cpu(crq->query_capability.number);
5675                 netdev_dbg(netdev, "min_rx_queues = %lld\n",
5676                            adapter->min_rx_queues);
5677                 break;
5678         case MIN_RX_ADD_QUEUES:
5679                 adapter->min_rx_add_queues =
5680                     be64_to_cpu(crq->query_capability.number);
5681                 netdev_dbg(netdev, "min_rx_add_queues = %lld\n",
5682                            adapter->min_rx_add_queues);
5683                 break;
5684         case MAX_TX_QUEUES:
5685                 adapter->max_tx_queues =
5686                     be64_to_cpu(crq->query_capability.number);
5687                 netdev_dbg(netdev, "max_tx_queues = %lld\n",
5688                            adapter->max_tx_queues);
5689                 break;
5690         case MAX_RX_QUEUES:
5691                 adapter->max_rx_queues =
5692                     be64_to_cpu(crq->query_capability.number);
5693                 netdev_dbg(netdev, "max_rx_queues = %lld\n",
5694                            adapter->max_rx_queues);
5695                 break;
5696         case MAX_RX_ADD_QUEUES:
5697                 adapter->max_rx_add_queues =
5698                     be64_to_cpu(crq->query_capability.number);
5699                 netdev_dbg(netdev, "max_rx_add_queues = %lld\n",
5700                            adapter->max_rx_add_queues);
5701                 break;
5702         case MIN_TX_ENTRIES_PER_SUBCRQ:
5703                 adapter->min_tx_entries_per_subcrq =
5704                     be64_to_cpu(crq->query_capability.number);
5705                 netdev_dbg(netdev, "min_tx_entries_per_subcrq = %lld\n",
5706                            adapter->min_tx_entries_per_subcrq);
5707                 break;
5708         case MIN_RX_ADD_ENTRIES_PER_SUBCRQ:
5709                 adapter->min_rx_add_entries_per_subcrq =
5710                     be64_to_cpu(crq->query_capability.number);
5711                 netdev_dbg(netdev, "min_rx_add_entrs_per_subcrq = %lld\n",
5712                            adapter->min_rx_add_entries_per_subcrq);
5713                 break;
5714         case MAX_TX_ENTRIES_PER_SUBCRQ:
5715                 adapter->max_tx_entries_per_subcrq =
5716                     be64_to_cpu(crq->query_capability.number);
5717                 netdev_dbg(netdev, "max_tx_entries_per_subcrq = %lld\n",
5718                            adapter->max_tx_entries_per_subcrq);
5719                 break;
5720         case MAX_RX_ADD_ENTRIES_PER_SUBCRQ:
5721                 adapter->max_rx_add_entries_per_subcrq =
5722                     be64_to_cpu(crq->query_capability.number);
5723                 netdev_dbg(netdev, "max_rx_add_entrs_per_subcrq = %lld\n",
5724                            adapter->max_rx_add_entries_per_subcrq);
5725                 break;
5726         case TCP_IP_OFFLOAD:
5727                 adapter->tcp_ip_offload =
5728                     be64_to_cpu(crq->query_capability.number);
5729                 netdev_dbg(netdev, "tcp_ip_offload = %lld\n",
5730                            adapter->tcp_ip_offload);
5731                 break;
5732         case PROMISC_SUPPORTED:
5733                 adapter->promisc_supported =
5734                     be64_to_cpu(crq->query_capability.number);
5735                 netdev_dbg(netdev, "promisc_supported = %lld\n",
5736                            adapter->promisc_supported);
5737                 break;
5738         case MIN_MTU:
5739                 adapter->min_mtu = be64_to_cpu(crq->query_capability.number);
5740                 netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
5741                 netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu);
5742                 break;
5743         case MAX_MTU:
5744                 adapter->max_mtu = be64_to_cpu(crq->query_capability.number);
5745                 netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
5746                 netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu);
5747                 break;
5748         case MAX_MULTICAST_FILTERS:
5749                 adapter->max_multicast_filters =
5750                     be64_to_cpu(crq->query_capability.number);
5751                 netdev_dbg(netdev, "max_multicast_filters = %lld\n",
5752                            adapter->max_multicast_filters);
5753                 break;
5754         case VLAN_HEADER_INSERTION:
5755                 adapter->vlan_header_insertion =
5756                     be64_to_cpu(crq->query_capability.number);
5757                 if (adapter->vlan_header_insertion)
5758                         netdev->features |= NETIF_F_HW_VLAN_STAG_TX;
5759                 netdev_dbg(netdev, "vlan_header_insertion = %lld\n",
5760                            adapter->vlan_header_insertion);
5761                 break;
5762         case RX_VLAN_HEADER_INSERTION:
5763                 adapter->rx_vlan_header_insertion =
5764                     be64_to_cpu(crq->query_capability.number);
5765                 netdev_dbg(netdev, "rx_vlan_header_insertion = %lld\n",
5766                            adapter->rx_vlan_header_insertion);
5767                 break;
5768         case MAX_TX_SG_ENTRIES:
5769                 adapter->max_tx_sg_entries =
5770                     be64_to_cpu(crq->query_capability.number);
5771                 netdev_dbg(netdev, "max_tx_sg_entries = %lld\n",
5772                            adapter->max_tx_sg_entries);
5773                 break;
5774         case RX_SG_SUPPORTED:
5775                 adapter->rx_sg_supported =
5776                     be64_to_cpu(crq->query_capability.number);
5777                 netdev_dbg(netdev, "rx_sg_supported = %lld\n",
5778                            adapter->rx_sg_supported);
5779                 break;
5780         case OPT_TX_COMP_SUB_QUEUES:
5781                 adapter->opt_tx_comp_sub_queues =
5782                     be64_to_cpu(crq->query_capability.number);
5783                 netdev_dbg(netdev, "opt_tx_comp_sub_queues = %lld\n",
5784                            adapter->opt_tx_comp_sub_queues);
5785                 break;
5786         case OPT_RX_COMP_QUEUES:
5787                 adapter->opt_rx_comp_queues =
5788                     be64_to_cpu(crq->query_capability.number);
5789                 netdev_dbg(netdev, "opt_rx_comp_queues = %lld\n",
5790                            adapter->opt_rx_comp_queues);
5791                 break;
5792         case OPT_RX_BUFADD_Q_PER_RX_COMP_Q:
5793                 adapter->opt_rx_bufadd_q_per_rx_comp_q =
5794                     be64_to_cpu(crq->query_capability.number);
5795                 netdev_dbg(netdev, "opt_rx_bufadd_q_per_rx_comp_q = %lld\n",
5796                            adapter->opt_rx_bufadd_q_per_rx_comp_q);
5797                 break;
5798         case OPT_TX_ENTRIES_PER_SUBCRQ:
5799                 adapter->opt_tx_entries_per_subcrq =
5800                     be64_to_cpu(crq->query_capability.number);
5801                 netdev_dbg(netdev, "opt_tx_entries_per_subcrq = %lld\n",
5802                            adapter->opt_tx_entries_per_subcrq);
5803                 break;
5804         case OPT_RXBA_ENTRIES_PER_SUBCRQ:
5805                 adapter->opt_rxba_entries_per_subcrq =
5806                     be64_to_cpu(crq->query_capability.number);
5807                 netdev_dbg(netdev, "opt_rxba_entries_per_subcrq = %lld\n",
5808                            adapter->opt_rxba_entries_per_subcrq);
5809                 break;
5810         case TX_RX_DESC_REQ:
5811                 adapter->tx_rx_desc_req = crq->query_capability.number;
5812                 netdev_dbg(netdev, "tx_rx_desc_req = %llx\n",
5813                            adapter->tx_rx_desc_req);
5814                 break;
5815
5816         default:
5817                 netdev_err(netdev, "Got invalid cap rsp %d\n",
5818                            crq->query_capability.capability);
5819         }
5820
5821 out:
5822         if (atomic_read(&adapter->running_cap_crqs) == 0)
5823                 send_request_cap(adapter, 0);
5824 }
5825
5826 static int send_query_phys_parms(struct ibmvnic_adapter *adapter)
5827 {
5828         union ibmvnic_crq crq;
5829         int rc;
5830
5831         memset(&crq, 0, sizeof(crq));
5832         crq.query_phys_parms.first = IBMVNIC_CRQ_CMD;
5833         crq.query_phys_parms.cmd = QUERY_PHYS_PARMS;
5834
5835         mutex_lock(&adapter->fw_lock);
5836         adapter->fw_done_rc = 0;
5837         reinit_completion(&adapter->fw_done);
5838
5839         rc = ibmvnic_send_crq(adapter, &crq);
5840         if (rc) {
5841                 mutex_unlock(&adapter->fw_lock);
5842                 return rc;
5843         }
5844
5845         rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000);
5846         if (rc) {
5847                 mutex_unlock(&adapter->fw_lock);
5848                 return rc;
5849         }
5850
5851         mutex_unlock(&adapter->fw_lock);
5852         return adapter->fw_done_rc ? -EIO : 0;
5853 }
5854
5855 static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq,
5856                                        struct ibmvnic_adapter *adapter)
5857 {
5858         struct net_device *netdev = adapter->netdev;
5859         int rc;
5860         __be32 rspeed = cpu_to_be32(crq->query_phys_parms_rsp.speed);
5861
5862         rc = crq->query_phys_parms_rsp.rc.code;
5863         if (rc) {
5864                 netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc);
5865                 return rc;
5866         }
5867         switch (rspeed) {
5868         case IBMVNIC_10MBPS:
5869                 adapter->speed = SPEED_10;
5870                 break;
5871         case IBMVNIC_100MBPS:
5872                 adapter->speed = SPEED_100;
5873                 break;
5874         case IBMVNIC_1GBPS:
5875                 adapter->speed = SPEED_1000;
5876                 break;
5877         case IBMVNIC_10GBPS:
5878                 adapter->speed = SPEED_10000;
5879                 break;
5880         case IBMVNIC_25GBPS:
5881                 adapter->speed = SPEED_25000;
5882                 break;
5883         case IBMVNIC_40GBPS:
5884                 adapter->speed = SPEED_40000;
5885                 break;
5886         case IBMVNIC_50GBPS:
5887                 adapter->speed = SPEED_50000;
5888                 break;
5889         case IBMVNIC_100GBPS:
5890                 adapter->speed = SPEED_100000;
5891                 break;
5892         case IBMVNIC_200GBPS:
5893                 adapter->speed = SPEED_200000;
5894                 break;
5895         default:
5896                 if (netif_carrier_ok(netdev))
5897                         netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed);
5898                 adapter->speed = SPEED_UNKNOWN;
5899         }
5900         if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX)
5901                 adapter->duplex = DUPLEX_FULL;
5902         else if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_HALF_DUPLEX)
5903                 adapter->duplex = DUPLEX_HALF;
5904         else
5905                 adapter->duplex = DUPLEX_UNKNOWN;
5906
5907         return rc;
5908 }
5909
5910 static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
5911                                struct ibmvnic_adapter *adapter)
5912 {
5913         struct ibmvnic_generic_crq *gen_crq = &crq->generic;
5914         struct net_device *netdev = adapter->netdev;
5915         struct device *dev = &adapter->vdev->dev;
5916         u64 *u64_crq = (u64 *)crq;
5917         long rc;
5918
5919         netdev_dbg(netdev, "Handling CRQ: %016lx %016lx\n",
5920                    (unsigned long)cpu_to_be64(u64_crq[0]),
5921                    (unsigned long)cpu_to_be64(u64_crq[1]));
5922         switch (gen_crq->first) {
5923         case IBMVNIC_CRQ_INIT_RSP:
5924                 switch (gen_crq->cmd) {
5925                 case IBMVNIC_CRQ_INIT:
5926                         dev_info(dev, "Partner initialized\n");
5927                         adapter->from_passive_init = true;
5928                         /* Discard any stale login responses from prev reset.
5929                          * CHECK: should we clear even on INIT_COMPLETE?
5930                          */
5931                         adapter->login_pending = false;
5932
5933                         if (adapter->state == VNIC_DOWN)
5934                                 rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT);
5935                         else
5936                                 rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
5937
5938                         if (rc && rc != -EBUSY) {
5939                                 /* We were unable to schedule the failover
5940                                  * reset either because the adapter was still
5941                                  * probing (eg: during kexec) or we could not
5942                                  * allocate memory. Clear the failover_pending
5943                                  * flag since no one else will. We ignore
5944                                  * EBUSY because it means either FAILOVER reset
5945                                  * is already scheduled or the adapter is
5946                                  * being removed.
5947                                  */
5948                                 netdev_err(netdev,
5949                                            "Error %ld scheduling failover reset\n",
5950                                            rc);
5951                                 adapter->failover_pending = false;
5952                         }
5953
5954                         if (!completion_done(&adapter->init_done)) {
5955                                 if (!adapter->init_done_rc)
5956                                         adapter->init_done_rc = -EAGAIN;
5957                                 complete(&adapter->init_done);
5958                         }
5959
5960                         break;
5961                 case IBMVNIC_CRQ_INIT_COMPLETE:
5962                         dev_info(dev, "Partner initialization complete\n");
5963                         adapter->crq.active = true;
5964                         send_version_xchg(adapter);
5965                         break;
5966                 default:
5967                         dev_err(dev, "Unknown crq cmd: %d\n", gen_crq->cmd);
5968                 }
5969                 return;
5970         case IBMVNIC_CRQ_XPORT_EVENT:
5971                 netif_carrier_off(netdev);
5972                 adapter->crq.active = false;
5973                 /* terminate any thread waiting for a response
5974                  * from the device
5975                  */
5976                 if (!completion_done(&adapter->fw_done)) {
5977                         adapter->fw_done_rc = -EIO;
5978                         complete(&adapter->fw_done);
5979                 }
5980
5981                 /* if we got here during crq-init, retry crq-init */
5982                 if (!completion_done(&adapter->init_done)) {
5983                         adapter->init_done_rc = -EAGAIN;
5984                         complete(&adapter->init_done);
5985                 }
5986
5987                 if (!completion_done(&adapter->stats_done))
5988                         complete(&adapter->stats_done);
5989                 if (test_bit(0, &adapter->resetting))
5990                         adapter->force_reset_recovery = true;
5991                 if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
5992                         dev_info(dev, "Migrated, re-enabling adapter\n");
5993                         ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
5994                 } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
5995                         dev_info(dev, "Backing device failover detected\n");
5996                         adapter->failover_pending = true;
5997                 } else {
5998                         /* The adapter lost the connection */
5999                         dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
6000                                 gen_crq->cmd);
6001                         ibmvnic_reset(adapter, VNIC_RESET_FATAL);
6002                 }
6003                 return;
6004         case IBMVNIC_CRQ_CMD_RSP:
6005                 break;
6006         default:
6007                 dev_err(dev, "Got an invalid msg type 0x%02x\n",
6008                         gen_crq->first);
6009                 return;
6010         }
6011
6012         switch (gen_crq->cmd) {
6013         case VERSION_EXCHANGE_RSP:
6014                 rc = crq->version_exchange_rsp.rc.code;
6015                 if (rc) {
6016                         dev_err(dev, "Error %ld in VERSION_EXCHG_RSP\n", rc);
6017                         break;
6018                 }
6019                 ibmvnic_version =
6020                             be16_to_cpu(crq->version_exchange_rsp.version);
6021                 dev_info(dev, "Partner protocol version is %d\n",
6022                          ibmvnic_version);
6023                 send_query_cap(adapter);
6024                 break;
6025         case QUERY_CAPABILITY_RSP:
6026                 handle_query_cap_rsp(crq, adapter);
6027                 break;
6028         case QUERY_MAP_RSP:
6029                 handle_query_map_rsp(crq, adapter);
6030                 break;
6031         case REQUEST_MAP_RSP:
6032                 adapter->fw_done_rc = crq->request_map_rsp.rc.code;
6033                 complete(&adapter->fw_done);
6034                 break;
6035         case REQUEST_UNMAP_RSP:
6036                 handle_request_unmap_rsp(crq, adapter);
6037                 break;
6038         case REQUEST_CAPABILITY_RSP:
6039                 handle_request_cap_rsp(crq, adapter);
6040                 break;
6041         case LOGIN_RSP:
6042                 netdev_dbg(netdev, "Got Login Response\n");
6043                 handle_login_rsp(crq, adapter);
6044                 break;
6045         case LOGICAL_LINK_STATE_RSP:
6046                 netdev_dbg(netdev,
6047                            "Got Logical Link State Response, state: %d rc: %d\n",
6048                            crq->logical_link_state_rsp.link_state,
6049                            crq->logical_link_state_rsp.rc.code);
6050                 adapter->logical_link_state =
6051                     crq->logical_link_state_rsp.link_state;
6052                 adapter->init_done_rc = crq->logical_link_state_rsp.rc.code;
6053                 complete(&adapter->init_done);
6054                 break;
6055         case LINK_STATE_INDICATION:
6056                 netdev_dbg(netdev, "Got Logical Link State Indication\n");
6057                 adapter->phys_link_state =
6058                     crq->link_state_indication.phys_link_state;
6059                 adapter->logical_link_state =
6060                     crq->link_state_indication.logical_link_state;
6061                 if (adapter->phys_link_state && adapter->logical_link_state)
6062                         netif_carrier_on(netdev);
6063                 else
6064                         netif_carrier_off(netdev);
6065                 break;
6066         case CHANGE_MAC_ADDR_RSP:
6067                 netdev_dbg(netdev, "Got MAC address change Response\n");
6068                 adapter->fw_done_rc = handle_change_mac_rsp(crq, adapter);
6069                 break;
6070         case ERROR_INDICATION:
6071                 netdev_dbg(netdev, "Got Error Indication\n");
6072                 handle_error_indication(crq, adapter);
6073                 break;
6074         case REQUEST_STATISTICS_RSP:
6075                 netdev_dbg(netdev, "Got Statistics Response\n");
6076                 complete(&adapter->stats_done);
6077                 break;
6078         case QUERY_IP_OFFLOAD_RSP:
6079                 netdev_dbg(netdev, "Got Query IP offload Response\n");
6080                 handle_query_ip_offload_rsp(adapter);
6081                 break;
6082         case MULTICAST_CTRL_RSP:
6083                 netdev_dbg(netdev, "Got multicast control Response\n");
6084                 break;
6085         case CONTROL_IP_OFFLOAD_RSP:
6086                 netdev_dbg(netdev, "Got Control IP offload Response\n");
6087                 dma_unmap_single(dev, adapter->ip_offload_ctrl_tok,
6088                                  sizeof(adapter->ip_offload_ctrl),
6089                                  DMA_TO_DEVICE);
6090                 complete(&adapter->init_done);
6091                 break;
6092         case COLLECT_FW_TRACE_RSP:
6093                 netdev_dbg(netdev, "Got Collect firmware trace Response\n");
6094                 complete(&adapter->fw_done);
6095                 break;
6096         case GET_VPD_SIZE_RSP:
6097                 handle_vpd_size_rsp(crq, adapter);
6098                 break;
6099         case GET_VPD_RSP:
6100                 handle_vpd_rsp(crq, adapter);
6101                 break;
6102         case QUERY_PHYS_PARMS_RSP:
6103                 adapter->fw_done_rc = handle_query_phys_parms_rsp(crq, adapter);
6104                 complete(&adapter->fw_done);
6105                 break;
6106         default:
6107                 netdev_err(netdev, "Got an invalid cmd type 0x%02x\n",
6108                            gen_crq->cmd);
6109         }
6110 }
6111
6112 static irqreturn_t ibmvnic_interrupt(int irq, void *instance)
6113 {
6114         struct ibmvnic_adapter *adapter = instance;
6115
6116         tasklet_schedule(&adapter->tasklet);
6117         return IRQ_HANDLED;
6118 }
6119
6120 static void ibmvnic_tasklet(struct tasklet_struct *t)
6121 {
6122         struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet);
6123         struct ibmvnic_crq_queue *queue = &adapter->crq;
6124         union ibmvnic_crq *crq;
6125         unsigned long flags;
6126
6127         spin_lock_irqsave(&queue->lock, flags);
6128
6129         /* Pull all the valid messages off the CRQ */
6130         while ((crq = ibmvnic_next_crq(adapter)) != NULL) {
6131                 /* This barrier makes sure ibmvnic_next_crq()'s
6132                  * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded
6133                  * before ibmvnic_handle_crq()'s
6134                  * switch(gen_crq->first) and switch(gen_crq->cmd).
6135                  */
6136                 dma_rmb();
6137                 ibmvnic_handle_crq(crq, adapter);
6138                 crq->generic.first = 0;
6139         }
6140
6141         spin_unlock_irqrestore(&queue->lock, flags);
6142 }
6143
6144 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *adapter)
6145 {
6146         struct vio_dev *vdev = adapter->vdev;
6147         int rc;
6148
6149         do {
6150                 rc = plpar_hcall_norets(H_ENABLE_CRQ, vdev->unit_address);
6151         } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc));
6152
6153         if (rc)
6154                 dev_err(&vdev->dev, "Error enabling adapter (rc=%d)\n", rc);
6155
6156         return rc;
6157 }
6158
6159 static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter)
6160 {
6161         struct ibmvnic_crq_queue *crq = &adapter->crq;
6162         struct device *dev = &adapter->vdev->dev;
6163         struct vio_dev *vdev = adapter->vdev;
6164         int rc;
6165
6166         /* Close the CRQ */
6167         do {
6168                 rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
6169         } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
6170
6171         /* Clean out the queue */
6172         if (!crq->msgs)
6173                 return -EINVAL;
6174
6175         memset(crq->msgs, 0, PAGE_SIZE);
6176         crq->cur = 0;
6177         crq->active = false;
6178
6179         /* And re-open it again */
6180         rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
6181                                 crq->msg_token, PAGE_SIZE);
6182
6183         if (rc == H_CLOSED)
6184                 /* Adapter is good, but other end is not ready */
6185                 dev_warn(dev, "Partner adapter not ready\n");
6186         else if (rc != 0)
6187                 dev_warn(dev, "Couldn't register crq (rc=%d)\n", rc);
6188
6189         return rc;
6190 }
6191
6192 static void release_crq_queue(struct ibmvnic_adapter *adapter)
6193 {
6194         struct ibmvnic_crq_queue *crq = &adapter->crq;
6195         struct vio_dev *vdev = adapter->vdev;
6196         long rc;
6197
6198         if (!crq->msgs)
6199                 return;
6200
6201         netdev_dbg(adapter->netdev, "Releasing CRQ\n");
6202         free_irq(vdev->irq, adapter);
6203         tasklet_kill(&adapter->tasklet);
6204         do {
6205                 rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
6206         } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
6207
6208         dma_unmap_single(&vdev->dev, crq->msg_token, PAGE_SIZE,
6209                          DMA_BIDIRECTIONAL);
6210         free_page((unsigned long)crq->msgs);
6211         crq->msgs = NULL;
6212         crq->active = false;
6213 }
6214
6215 static int init_crq_queue(struct ibmvnic_adapter *adapter)
6216 {
6217         struct ibmvnic_crq_queue *crq = &adapter->crq;
6218         struct device *dev = &adapter->vdev->dev;
6219         struct vio_dev *vdev = adapter->vdev;
6220         int rc, retrc = -ENOMEM;
6221
6222         if (crq->msgs)
6223                 return 0;
6224
6225         crq->msgs = (union ibmvnic_crq *)get_zeroed_page(GFP_KERNEL);
6226         /* Should we allocate more than one page? */
6227
6228         if (!crq->msgs)
6229                 return -ENOMEM;
6230
6231         crq->size = PAGE_SIZE / sizeof(*crq->msgs);
6232         crq->msg_token = dma_map_single(dev, crq->msgs, PAGE_SIZE,
6233                                         DMA_BIDIRECTIONAL);
6234         if (dma_mapping_error(dev, crq->msg_token))
6235                 goto map_failed;
6236
6237         rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
6238                                 crq->msg_token, PAGE_SIZE);
6239
6240         if (rc == H_RESOURCE)
6241                 /* maybe kexecing and resource is busy. try a reset */
6242                 rc = ibmvnic_reset_crq(adapter);
6243         retrc = rc;
6244
6245         if (rc == H_CLOSED) {
6246                 dev_warn(dev, "Partner adapter not ready\n");
6247         } else if (rc) {
6248                 dev_warn(dev, "Error %d opening adapter\n", rc);
6249                 goto reg_crq_failed;
6250         }
6251
6252         retrc = 0;
6253
6254         tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet);
6255
6256         netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq);
6257         snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x",
6258                  adapter->vdev->unit_address);
6259         rc = request_irq(vdev->irq, ibmvnic_interrupt, 0, crq->name, adapter);
6260         if (rc) {
6261                 dev_err(dev, "Couldn't register irq 0x%x. rc=%d\n",
6262                         vdev->irq, rc);
6263                 goto req_irq_failed;
6264         }
6265
6266         rc = vio_enable_interrupts(vdev);
6267         if (rc) {
6268                 dev_err(dev, "Error %d enabling interrupts\n", rc);
6269                 goto req_irq_failed;
6270         }
6271
6272         crq->cur = 0;
6273         spin_lock_init(&crq->lock);
6274
6275         /* process any CRQs that were queued before we enabled interrupts */
6276         tasklet_schedule(&adapter->tasklet);
6277
6278         return retrc;
6279
6280 req_irq_failed:
6281         tasklet_kill(&adapter->tasklet);
6282         do {
6283                 rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
6284         } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
6285 reg_crq_failed:
6286         dma_unmap_single(dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
6287 map_failed:
6288         free_page((unsigned long)crq->msgs);
6289         crq->msgs = NULL;
6290         return retrc;
6291 }
6292
6293 static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
6294 {
6295         struct device *dev = &adapter->vdev->dev;
6296         unsigned long timeout = msecs_to_jiffies(20000);
6297         u64 old_num_rx_queues = adapter->req_rx_queues;
6298         u64 old_num_tx_queues = adapter->req_tx_queues;
6299         int rc;
6300
6301         adapter->from_passive_init = false;
6302
6303         rc = ibmvnic_send_crq_init(adapter);
6304         if (rc) {
6305                 dev_err(dev, "Send crq init failed with error %d\n", rc);
6306                 return rc;
6307         }
6308
6309         if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
6310                 dev_err(dev, "Initialization sequence timed out\n");
6311                 return -ETIMEDOUT;
6312         }
6313
6314         if (adapter->init_done_rc) {
6315                 release_crq_queue(adapter);
6316                 dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc);
6317                 return adapter->init_done_rc;
6318         }
6319
6320         if (adapter->from_passive_init) {
6321                 adapter->state = VNIC_OPEN;
6322                 adapter->from_passive_init = false;
6323                 dev_err(dev, "CRQ-init failed, passive-init\n");
6324                 return -EINVAL;
6325         }
6326
6327         if (reset &&
6328             test_bit(0, &adapter->resetting) && !adapter->wait_for_reset &&
6329             adapter->reset_reason != VNIC_RESET_MOBILITY) {
6330                 if (adapter->req_rx_queues != old_num_rx_queues ||
6331                     adapter->req_tx_queues != old_num_tx_queues) {
6332                         release_sub_crqs(adapter, 0);
6333                         rc = init_sub_crqs(adapter);
6334                 } else {
6335                         /* no need to reinitialize completely, but we do
6336                          * need to clean up transmits that were in flight
6337                          * when we processed the reset.  Failure to do so
6338                          * will confound the upper layer, usually TCP, by
6339                          * creating the illusion of transmits that are
6340                          * awaiting completion.
6341                          */
6342                         clean_tx_pools(adapter);
6343
6344                         rc = reset_sub_crq_queues(adapter);
6345                 }
6346         } else {
6347                 rc = init_sub_crqs(adapter);
6348         }
6349
6350         if (rc) {
6351                 dev_err(dev, "Initialization of sub crqs failed\n");
6352                 release_crq_queue(adapter);
6353                 return rc;
6354         }
6355
6356         rc = init_sub_crq_irqs(adapter);
6357         if (rc) {
6358                 dev_err(dev, "Failed to initialize sub crq irqs\n");
6359                 release_crq_queue(adapter);
6360         }
6361
6362         return rc;
6363 }
6364
6365 static struct device_attribute dev_attr_failover;
6366
6367 static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
6368 {
6369         struct ibmvnic_adapter *adapter;
6370         struct net_device *netdev;
6371         unsigned char *mac_addr_p;
6372         unsigned long flags;
6373         bool init_success;
6374         int rc;
6375
6376         dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n",
6377                 dev->unit_address);
6378
6379         mac_addr_p = (unsigned char *)vio_get_attribute(dev,
6380                                                         VETH_MAC_ADDR, NULL);
6381         if (!mac_addr_p) {
6382                 dev_err(&dev->dev,
6383                         "(%s:%3.3d) ERROR: Can't find MAC_ADDR attribute\n",
6384                         __FILE__, __LINE__);
6385                 return 0;
6386         }
6387
6388         netdev = alloc_etherdev_mq(sizeof(struct ibmvnic_adapter),
6389                                    IBMVNIC_MAX_QUEUES);
6390         if (!netdev)
6391                 return -ENOMEM;
6392
6393         adapter = netdev_priv(netdev);
6394         adapter->state = VNIC_PROBING;
6395         dev_set_drvdata(&dev->dev, netdev);
6396         adapter->vdev = dev;
6397         adapter->netdev = netdev;
6398         adapter->login_pending = false;
6399         memset(&adapter->map_ids, 0, sizeof(adapter->map_ids));
6400         /* map_ids start at 1, so ensure map_id 0 is always "in-use" */
6401         bitmap_set(adapter->map_ids, 0, 1);
6402
6403         ether_addr_copy(adapter->mac_addr, mac_addr_p);
6404         eth_hw_addr_set(netdev, adapter->mac_addr);
6405         netdev->irq = dev->irq;
6406         netdev->netdev_ops = &ibmvnic_netdev_ops;
6407         netdev->ethtool_ops = &ibmvnic_ethtool_ops;
6408         SET_NETDEV_DEV(netdev, &dev->dev);
6409
6410         INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
6411         INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset,
6412                           __ibmvnic_delayed_reset);
6413         INIT_LIST_HEAD(&adapter->rwi_list);
6414         spin_lock_init(&adapter->rwi_lock);
6415         spin_lock_init(&adapter->state_lock);
6416         mutex_init(&adapter->fw_lock);
6417         init_completion(&adapter->probe_done);
6418         init_completion(&adapter->init_done);
6419         init_completion(&adapter->fw_done);
6420         init_completion(&adapter->reset_done);
6421         init_completion(&adapter->stats_done);
6422         clear_bit(0, &adapter->resetting);
6423         adapter->prev_rx_buf_sz = 0;
6424         adapter->prev_mtu = 0;
6425
6426         init_success = false;
6427         do {
6428                 reinit_init_done(adapter);
6429
6430                 /* clear any failovers we got in the previous pass
6431                  * since we are reinitializing the CRQ
6432                  */
6433                 adapter->failover_pending = false;
6434
6435                 /* If we had already initialized CRQ, we may have one or
6436                  * more resets queued already. Discard those and release
6437                  * the CRQ before initializing the CRQ again.
6438                  */
6439                 release_crq_queue(adapter);
6440
6441                 /* Since we are still in PROBING state, __ibmvnic_reset()
6442                  * will not access the ->rwi_list and since we released CRQ,
6443                  * we won't get _new_ transport events. But there maybe an
6444                  * ongoing ibmvnic_reset() call. So serialize access to
6445                  * rwi_list. If we win the race, ibvmnic_reset() could add
6446                  * a reset after we purged but thats ok - we just may end
6447                  * up with an extra reset (i.e similar to having two or more
6448                  * resets in the queue at once).
6449                  * CHECK.
6450                  */
6451                 spin_lock_irqsave(&adapter->rwi_lock, flags);
6452                 flush_reset_queue(adapter);
6453                 spin_unlock_irqrestore(&adapter->rwi_lock, flags);
6454
6455                 rc = init_crq_queue(adapter);
6456                 if (rc) {
6457                         dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n",
6458                                 rc);
6459                         goto ibmvnic_init_fail;
6460                 }
6461
6462                 rc = ibmvnic_reset_init(adapter, false);
6463         } while (rc == -EAGAIN);
6464
6465         /* We are ignoring the error from ibmvnic_reset_init() assuming that the
6466          * partner is not ready. CRQ is not active. When the partner becomes
6467          * ready, we will do the passive init reset.
6468          */
6469
6470         if (!rc)
6471                 init_success = true;
6472
6473         rc = init_stats_buffers(adapter);
6474         if (rc)
6475                 goto ibmvnic_init_fail;
6476
6477         rc = init_stats_token(adapter);
6478         if (rc)
6479                 goto ibmvnic_stats_fail;
6480
6481         rc = device_create_file(&dev->dev, &dev_attr_failover);
6482         if (rc)
6483                 goto ibmvnic_dev_file_err;
6484
6485         netif_carrier_off(netdev);
6486
6487         if (init_success) {
6488                 adapter->state = VNIC_PROBED;
6489                 netdev->mtu = adapter->req_mtu - ETH_HLEN;
6490                 netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
6491                 netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
6492         } else {
6493                 adapter->state = VNIC_DOWN;
6494         }
6495
6496         adapter->wait_for_reset = false;
6497         adapter->last_reset_time = jiffies;
6498
6499         rc = register_netdev(netdev);
6500         if (rc) {
6501                 dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
6502                 goto ibmvnic_register_fail;
6503         }
6504         dev_info(&dev->dev, "ibmvnic registered\n");
6505
6506         rc = ibmvnic_cpu_notif_add(adapter);
6507         if (rc) {
6508                 netdev_err(netdev, "Registering cpu notifier failed\n");
6509                 goto cpu_notif_add_failed;
6510         }
6511
6512         complete(&adapter->probe_done);
6513
6514         return 0;
6515
6516 cpu_notif_add_failed:
6517         unregister_netdev(netdev);
6518
6519 ibmvnic_register_fail:
6520         device_remove_file(&dev->dev, &dev_attr_failover);
6521
6522 ibmvnic_dev_file_err:
6523         release_stats_token(adapter);
6524
6525 ibmvnic_stats_fail:
6526         release_stats_buffers(adapter);
6527
6528 ibmvnic_init_fail:
6529         release_sub_crqs(adapter, 1);
6530         release_crq_queue(adapter);
6531
6532         /* cleanup worker thread after releasing CRQ so we don't get
6533          * transport events (i.e new work items for the worker thread).
6534          */
6535         adapter->state = VNIC_REMOVING;
6536         complete(&adapter->probe_done);
6537         flush_work(&adapter->ibmvnic_reset);
6538         flush_delayed_work(&adapter->ibmvnic_delayed_reset);
6539
6540         flush_reset_queue(adapter);
6541
6542         mutex_destroy(&adapter->fw_lock);
6543         free_netdev(netdev);
6544
6545         return rc;
6546 }
6547
6548 static void ibmvnic_remove(struct vio_dev *dev)
6549 {
6550         struct net_device *netdev = dev_get_drvdata(&dev->dev);
6551         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
6552         unsigned long flags;
6553
6554         spin_lock_irqsave(&adapter->state_lock, flags);
6555
6556         /* If ibmvnic_reset() is scheduling a reset, wait for it to
6557          * finish. Then, set the state to REMOVING to prevent it from
6558          * scheduling any more work and to have reset functions ignore
6559          * any resets that have already been scheduled. Drop the lock
6560          * after setting state, so __ibmvnic_reset() which is called
6561          * from the flush_work() below, can make progress.
6562          */
6563         spin_lock(&adapter->rwi_lock);
6564         adapter->state = VNIC_REMOVING;
6565         spin_unlock(&adapter->rwi_lock);
6566
6567         spin_unlock_irqrestore(&adapter->state_lock, flags);
6568
6569         ibmvnic_cpu_notif_remove(adapter);
6570
6571         flush_work(&adapter->ibmvnic_reset);
6572         flush_delayed_work(&adapter->ibmvnic_delayed_reset);
6573
6574         rtnl_lock();
6575         unregister_netdevice(netdev);
6576
6577         release_resources(adapter);
6578         release_rx_pools(adapter);
6579         release_tx_pools(adapter);
6580         release_sub_crqs(adapter, 1);
6581         release_crq_queue(adapter);
6582
6583         release_stats_token(adapter);
6584         release_stats_buffers(adapter);
6585
6586         adapter->state = VNIC_REMOVED;
6587
6588         rtnl_unlock();
6589         mutex_destroy(&adapter->fw_lock);
6590         device_remove_file(&dev->dev, &dev_attr_failover);
6591         free_netdev(netdev);
6592         dev_set_drvdata(&dev->dev, NULL);
6593 }
6594
6595 static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
6596                               const char *buf, size_t count)
6597 {
6598         struct net_device *netdev = dev_get_drvdata(dev);
6599         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
6600         unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
6601         __be64 session_token;
6602         long rc;
6603
6604         if (!sysfs_streq(buf, "1"))
6605                 return -EINVAL;
6606
6607         rc = plpar_hcall(H_VIOCTL, retbuf, adapter->vdev->unit_address,
6608                          H_GET_SESSION_TOKEN, 0, 0, 0);
6609         if (rc) {
6610                 netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n",
6611                            rc);
6612                 goto last_resort;
6613         }
6614
6615         session_token = (__be64)retbuf[0];
6616         netdev_dbg(netdev, "Initiating client failover, session id %llx\n",
6617                    be64_to_cpu(session_token));
6618         rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
6619                                 H_SESSION_ERR_DETECTED, session_token, 0, 0);
6620         if (rc) {
6621                 netdev_err(netdev,
6622                            "H_VIOCTL initiated failover failed, rc %ld\n",
6623                            rc);
6624                 goto last_resort;
6625         }
6626
6627         return count;
6628
6629 last_resort:
6630         netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n");
6631         ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
6632
6633         return count;
6634 }
6635 static DEVICE_ATTR_WO(failover);
6636
6637 static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev)
6638 {
6639         struct net_device *netdev = dev_get_drvdata(&vdev->dev);
6640         struct ibmvnic_adapter *adapter;
6641         struct iommu_table *tbl;
6642         unsigned long ret = 0;
6643         int i;
6644
6645         tbl = get_iommu_table_base(&vdev->dev);
6646
6647         /* netdev inits at probe time along with the structures we need below*/
6648         if (!netdev)
6649                 return IOMMU_PAGE_ALIGN(IBMVNIC_IO_ENTITLEMENT_DEFAULT, tbl);
6650
6651         adapter = netdev_priv(netdev);
6652
6653         ret += PAGE_SIZE; /* the crq message queue */
6654         ret += IOMMU_PAGE_ALIGN(sizeof(struct ibmvnic_statistics), tbl);
6655
6656         for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++)
6657                 ret += 4 * PAGE_SIZE; /* the scrq message queue */
6658
6659         for (i = 0; i < adapter->num_active_rx_pools; i++)
6660                 ret += adapter->rx_pool[i].size *
6661                     IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl);
6662
6663         return ret;
6664 }
6665
6666 static int ibmvnic_resume(struct device *dev)
6667 {
6668         struct net_device *netdev = dev_get_drvdata(dev);
6669         struct ibmvnic_adapter *adapter = netdev_priv(netdev);
6670
6671         if (adapter->state != VNIC_OPEN)
6672                 return 0;
6673
6674         tasklet_schedule(&adapter->tasklet);
6675
6676         return 0;
6677 }
6678
6679 static const struct vio_device_id ibmvnic_device_table[] = {
6680         {"network", "IBM,vnic"},
6681         {"", "" }
6682 };
6683 MODULE_DEVICE_TABLE(vio, ibmvnic_device_table);
6684
6685 static const struct dev_pm_ops ibmvnic_pm_ops = {
6686         .resume = ibmvnic_resume
6687 };
6688
6689 static struct vio_driver ibmvnic_driver = {
6690         .id_table       = ibmvnic_device_table,
6691         .probe          = ibmvnic_probe,
6692         .remove         = ibmvnic_remove,
6693         .get_desired_dma = ibmvnic_get_desired_dma,
6694         .name           = ibmvnic_driver_name,
6695         .pm             = &ibmvnic_pm_ops,
6696 };
6697
6698 /* module functions */
6699 static int __init ibmvnic_module_init(void)
6700 {
6701         int ret;
6702
6703         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/ibmvnic:online",
6704                                       ibmvnic_cpu_online,
6705                                       ibmvnic_cpu_down_prep);
6706         if (ret < 0)
6707                 goto out;
6708         ibmvnic_online = ret;
6709         ret = cpuhp_setup_state_multi(CPUHP_IBMVNIC_DEAD, "net/ibmvnic:dead",
6710                                       NULL, ibmvnic_cpu_dead);
6711         if (ret)
6712                 goto err_dead;
6713
6714         ret = vio_register_driver(&ibmvnic_driver);
6715         if (ret)
6716                 goto err_vio_register;
6717
6718         pr_info("%s: %s %s\n", ibmvnic_driver_name, ibmvnic_driver_string,
6719                 IBMVNIC_DRIVER_VERSION);
6720
6721         return 0;
6722 err_vio_register:
6723         cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD);
6724 err_dead:
6725         cpuhp_remove_multi_state(ibmvnic_online);
6726 out:
6727         return ret;
6728 }
6729
6730 static void __exit ibmvnic_module_exit(void)
6731 {
6732         vio_unregister_driver(&ibmvnic_driver);
6733         cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD);
6734         cpuhp_remove_multi_state(ibmvnic_online);
6735 }
6736
6737 module_init(ibmvnic_module_init);
6738 module_exit(ibmvnic_module_exit);
This page took 0.452323 seconds and 4 git commands to generate.