]> Git Repo - J-linux.git/commitdiff
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
authorLinus Torvalds <[email protected]>
Thu, 7 Jun 2018 20:04:07 +0000 (13:04 -0700)
committerLinus Torvalds <[email protected]>
Thu, 7 Jun 2018 20:04:07 +0000 (13:04 -0700)
Pull rdma updates from Jason Gunthorpe:
 "This has been a quiet cycle for RDMA, the big bulk is the usual
  smallish driver updates and bug fixes. About four new uAPI related
  things. Not as much Szykaller patches this time, the bugs it finds are
  getting harder to fix.

  Summary:

   - More work cleaning up the RDMA CM code

   - Usual driver bug fixes and cleanups for qedr, qib, hfi1, hns,
     i40iw, iw_cxgb4, mlx5, rxe

   - Driver specific resource tracking and reporting via netlink

   - Continued work for name space support from Parav

   - MPLS support for the verbs flow steering uAPI

   - A few tricky IPoIB fixes improving robustness

   - HFI1 driver support for the '16B' management packet format

   - Some auditing to not print kernel pointers via %llx or similar

   - Mark the entire 'UCM' user-space interface as BROKEN with the
     intent to remove it entirely. The user space side of this was long
     ago replaced with RDMA-CM and syzkaller is finding bugs in the
     residual UCM interface nobody wishes to fix because nobody uses it.

   - Purge more bogus BUG_ON's from Leon

   - 'flow counters' verbs uAPI

   - T10 fixups for iser/isert, these are Acked by Martin but going
     through the RDMA tree due to dependencies"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (138 commits)
  RDMA/mlx5: Update SPDX tags to show proper license
  RDMA/restrack: Change SPDX tag to properly reflect license
  IB/hfi1: Fix comment on default hdr entry size
  IB/hfi1: Rename exp_lock to exp_mutex
  IB/hfi1: Add bypass register defines and replace blind constants
  IB/hfi1: Remove unused variable
  IB/hfi1: Ensure VL index is within bounds
  IB/hfi1: Fix user context tail allocation for DMA_RTAIL
  IB/hns: Use zeroing memory allocator instead of allocator/memset
  infiniband: fix a possible use-after-free bug
  iw_cxgb4: add INFINIBAND_ADDR_TRANS dependency
  IB/isert: use T10-PI check mask definitions from core layer
  IB/iser: use T10-PI check mask definitions from core layer
  RDMA/core: introduce check masks for T10-PI offload
  IB/isert: fix T10-pi check mask setting
  IB/mlx5: Add counters read support
  IB/mlx5: Add flow counters read support
  IB/mlx5: Add flow counters binding support
  IB/mlx5: Add counters create and destroy support
  IB/uverbs: Add support for flow counters
  ...

1  2 
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/qedr/verbs.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
include/linux/mlx5/device.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mm.h

index 3330d97faa1e540c499428489ad5055ae89e5e20,2bdfc4b4a15ce82a1d467d11a3ec395a2fae8f01..71a34bee453d8fa4b3f75748e6589cc8f3763633
@@@ -125,6 -125,16 +125,16 @@@ const char *ib_cache_gid_type_str(enum 
  }
  EXPORT_SYMBOL(ib_cache_gid_type_str);
  
+ /** rdma_is_zero_gid - Check if given GID is zero or not.
+  * @gid:      GID to check
+  * Returns true if given GID is zero, returns false otherwise.
+  */
+ bool rdma_is_zero_gid(const union ib_gid *gid)
+ {
+       return !memcmp(gid, &zgid, sizeof(*gid));
+ }
+ EXPORT_SYMBOL(rdma_is_zero_gid);
  int ib_cache_gid_parse_type_str(const char *buf)
  {
        unsigned int i;
  }
  EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
  
+ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
+ {
+       return device->cache.ports[port - rdma_start_port(device)].gid;
+ }
  static void del_roce_gid(struct ib_device *device, u8 port_num,
                         struct ib_gid_table *table, int ix)
  {
@@@ -231,7 -246,7 +246,7 @@@ static int add_modify_gid(struct ib_gid
                 * So ignore such behavior for IB link layer and don't
                 * fail the call, but don't add such entry to GID cache.
                 */
-               if (!memcmp(gid, &zgid, sizeof(*gid)))
+               if (rdma_is_zero_gid(gid))
                        return 0;
        }
  
@@@ -264,7 -279,7 +279,7 @@@ static void del_gid(struct ib_device *i
  
        if (rdma_protocol_roce(ib_dev, port))
                del_roce_gid(ib_dev, port, table, ix);
-       memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
+       memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
        memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
        table->data_vec[ix].context = NULL;
  }
@@@ -363,10 -378,10 +378,10 @@@ static int __ib_cache_gid_add(struct ib
         * IB spec version 1.3 section 4.1.1 point (6) and
         * section 12.7.10 and section 12.7.20
         */
-       if (!memcmp(gid, &zgid, sizeof(*gid)))
+       if (rdma_is_zero_gid(gid))
                return -EINVAL;
  
-       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+       table = rdma_gid_table(ib_dev, port);
  
        mutex_lock(&table->lock);
  
@@@ -433,7 -448,7 +448,7 @@@ _ib_cache_gid_del(struct ib_device *ib_
        int ret = 0;
        int ix;
  
-       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+       table = rdma_gid_table(ib_dev, port);
  
        mutex_lock(&table->lock);
  
@@@ -472,7 -487,7 +487,7 @@@ int ib_cache_gid_del_all_netdev_gids(st
        int ix;
        bool deleted = false;
  
-       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+       table = rdma_gid_table(ib_dev, port);
  
        mutex_lock(&table->lock);
  
@@@ -496,13 -511,13 +511,13 @@@ static int __ib_cache_gid_get(struct ib
  {
        struct ib_gid_table *table;
  
-       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+       table = rdma_gid_table(ib_dev, port);
  
        if (index < 0 || index >= table->sz)
                return -EINVAL;
  
        if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
 -              return -EAGAIN;
 +              return -EINVAL;
  
        memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
        if (attr) {
@@@ -589,7 -604,7 +604,7 @@@ int ib_find_cached_gid_by_port(struct i
        if (!rdma_is_port_valid(ib_dev, port))
                return -ENOENT;
  
-       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+       table = rdma_gid_table(ib_dev, port);
  
        if (ndev)
                mask |= GID_ATTR_FIND_MASK_NETDEV;
@@@ -647,7 -662,7 +662,7 @@@ static int ib_cache_gid_find_by_filter(
            !rdma_protocol_roce(ib_dev, port))
                return -EPROTONOSUPPORT;
  
-       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+       table = rdma_gid_table(ib_dev, port);
  
        read_lock_irqsave(&table->rwlock, flags);
        for (i = 0; i < table->sz; i++) {
@@@ -724,8 -739,7 +739,7 @@@ static void cleanup_gid_table_port(stru
  
        mutex_lock(&table->lock);
        for (i = 0; i < table->sz; ++i) {
-               if (memcmp(&table->data_vec[i].gid, &zgid,
-                          sizeof(table->data_vec[i].gid))) {
+               if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
                        del_gid(ib_dev, port, table, i);
                        deleted = true;
                }
@@@ -747,7 -761,7 +761,7 @@@ void ib_cache_gid_set_default_gid(struc
        unsigned int gid_type;
        unsigned long mask;
  
-       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+       table = rdma_gid_table(ib_dev, port);
  
        mask = GID_ATTR_FIND_MASK_GID_TYPE |
               GID_ATTR_FIND_MASK_DEFAULT |
        }
  }
  
- static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
-                                    struct ib_gid_table *table)
+ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
+                                     struct ib_gid_table *table)
  {
        unsigned int i;
        unsigned long roce_gid_type_mask;
        roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
        num_default_gids = hweight_long(roce_gid_type_mask);
        for (i = 0; i < num_default_gids && i < table->sz; i++) {
-               struct ib_gid_table_entry *entry =
-                       &table->data_vec[i];
+               struct ib_gid_table_entry *entry = &table->data_vec[i];
  
                entry->props |= GID_TABLE_ENTRY_DEFAULT;
                current_gid = find_next_bit(&roce_gid_type_mask,
                                            current_gid);
                entry->attr.gid_type = current_gid++;
        }
+ }
  
-       return 0;
+ static void gid_table_release_one(struct ib_device *ib_dev)
+ {
+       struct ib_gid_table *table;
+       u8 port;
+       for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+               table = ib_dev->cache.ports[port].gid;
+               release_gid_table(table);
+               ib_dev->cache.ports[port].gid = NULL;
+       }
  }
  
  static int _gid_table_setup_one(struct ib_device *ib_dev)
  {
        u8 port;
        struct ib_gid_table *table;
-       int err = 0;
  
        for (port = 0; port < ib_dev->phys_port_cnt; port++) {
                u8 rdma_port = port + rdma_start_port(ib_dev);
  
-               table =
-                       alloc_gid_table(
+               table = alloc_gid_table(
                                ib_dev->port_immutable[rdma_port].gid_tbl_len);
-               if (!table) {
-                       err = -ENOMEM;
+               if (!table)
                        goto rollback_table_setup;
-               }
  
-               err = gid_table_reserve_default(ib_dev,
-                                               port + rdma_start_port(ib_dev),
-                                               table);
-               if (err)
-                       goto rollback_table_setup;
+               gid_table_reserve_default(ib_dev, rdma_port, table);
                ib_dev->cache.ports[port].gid = table;
        }
        return 0;
  
  rollback_table_setup:
-       for (port = 0; port < ib_dev->phys_port_cnt; port++) {
-               table = ib_dev->cache.ports[port].gid;
-               cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
-                                      table);
-               release_gid_table(table);
-       }
-       return err;
- }
- static void gid_table_release_one(struct ib_device *ib_dev)
- {
-       struct ib_gid_table *table;
-       u8 port;
-       for (port = 0; port < ib_dev->phys_port_cnt; port++) {
-               table = ib_dev->cache.ports[port].gid;
-               release_gid_table(table);
-               ib_dev->cache.ports[port].gid = NULL;
-       }
+       gid_table_release_one(ib_dev);
+       return -ENOMEM;
  }
  
  static void gid_table_cleanup_one(struct ib_device *ib_dev)
@@@ -886,7 -882,7 +882,7 @@@ int ib_get_cached_gid(struct ib_device 
        if (!rdma_is_port_valid(device, port_num))
                return -EINVAL;
  
-       table = device->cache.ports[port_num - rdma_start_port(device)].gid;
+       table = rdma_gid_table(device, port_num);
        read_lock_irqsave(&table->rwlock, flags);
        res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
        read_unlock_irqrestore(&table->rwlock, flags);
@@@ -1104,7 -1100,7 +1100,7 @@@ static int config_non_roce_gid_cache(st
  
        gid_attr.device = device;
        gid_attr.port_num = port;
-       table = device->cache.ports[port - rdma_start_port(device)].gid;
+       table = rdma_gid_table(device, port);
  
        mutex_lock(&table->lock);
        for (i = 0; i < gid_tbl_len; ++i) {
@@@ -1137,7 -1133,7 +1133,7 @@@ static void ib_cache_update(struct ib_d
        if (!rdma_is_port_valid(device, port))
                return;
  
-       table = device->cache.ports[port - rdma_start_port(device)].gid;
+       table = rdma_gid_table(device, port);
  
        tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
        if (!tprops)
                        goto err;
        }
  
 -      pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
 -                           sizeof *pkey_cache->table, GFP_KERNEL);
 +      pkey_cache = kmalloc(struct_size(pkey_cache, table,
 +                                       tprops->pkey_tbl_len),
 +                           GFP_KERNEL);
        if (!pkey_cache)
                goto err;
  
@@@ -1300,13 -1295,3 +1296,3 @@@ void ib_cache_cleanup_one(struct ib_dev
        flush_workqueue(ib_wq);
        gid_table_cleanup_one(device);
  }
- void __init ib_cache_setup(void)
- {
-       roce_gid_mgmt_init();
- }
- void __exit ib_cache_cleanup(void)
- {
-       roce_gid_mgmt_cleanup();
- }
index 36a4d90a7b47b50c6fbf8aa99452dc5226cda6cb,724f123c037f9a980fb5d1e469589af1a49ccd53..27a7b0a2e27a79c5d3f7bc9160915af35dbcb495
@@@ -452,6 -452,32 +452,32 @@@ static void cm_set_private_data(struct 
        cm_id_priv->private_data_len = private_data_len;
  }
  
+ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
+                             struct ib_grh *grh, struct cm_av *av)
+ {
+       struct rdma_ah_attr new_ah_attr;
+       int ret;
+       av->port = port;
+       av->pkey_index = wc->pkey_index;
+       /*
+        * av->ah_attr might be initialized based on past wc during incoming
+        * connect request or while sending out connect request. So initialize
+        * a new ah_attr on stack. If initialization fails, old ah_attr is
+        * used for sending any responses. If initialization is successful,
+        * than new ah_attr is used by overwriting old one.
+        */
+       ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
+                                     port->port_num, wc,
+                                     grh, &new_ah_attr);
+       if (ret)
+               return ret;
+       memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+       return 0;
+ }
  static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
                                   struct ib_grh *grh, struct cm_av *av)
  {
@@@ -509,6 -535,7 +535,7 @@@ static struct cm_port *get_cm_port_from
  static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
                              struct cm_id_private *cm_id_priv)
  {
+       struct rdma_ah_attr new_ah_attr;
        struct cm_device *cm_dev;
        struct cm_port *port;
        int ret;
                return ret;
  
        av->port = port;
+       /*
+        * av->ah_attr might be initialized based on wc or during
+        * request processing time. So initialize a new ah_attr on stack.
+        * If initialization fails, old ah_attr is used for sending any
+        * responses. If initialization is successful, than new ah_attr
+        * is used by overwriting the old one.
+        */
        ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
-                                       &av->ah_attr);
+                                       &new_ah_attr);
        if (ret)
                return ret;
  
        av->timeout = path->packet_life_time + 1;
  
        ret = add_cm_id_to_port_list(cm_id_priv, av, port);
-       return ret;
+       if (ret)
+               return ret;
+       memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+       return 0;
  }
  
  static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@@ -1669,7 -1707,9 +1707,9 @@@ static void cm_process_work(struct cm_i
                spin_lock_irq(&cm_id_priv->lock);
                work = cm_dequeue_work(cm_id_priv);
                spin_unlock_irq(&cm_id_priv->lock);
-               BUG_ON(!work);
+               if (!work)
+                       return;
                ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
                                                &work->cm_event);
                cm_free_work(work);
@@@ -3189,12 -3229,6 +3229,6 @@@ static int cm_lap_handler(struct cm_wor
        if (!cm_id_priv)
                return -EINVAL;
  
-       ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
-                                     work->mad_recv_wc->recv_buf.grh,
-                                     &cm_id_priv->av);
-       if (ret)
-               goto deref;
        param = &work->cm_event.param.lap_rcvd;
        memset(&work->path[0], 0, sizeof(work->path[1]));
        cm_path_set_rec_type(work->port->cm_dev->ib_device,
                goto unlock;
        }
  
-       cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
-       cm_id_priv->tid = lap_msg->hdr.tid;
+       ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
+                                work->mad_recv_wc->recv_buf.grh,
+                                &cm_id_priv->av);
+       if (ret)
+               goto unlock;
        cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
                           cm_id_priv);
+       cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
+       cm_id_priv->tid = lap_msg->hdr.tid;
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
@@@ -4298,8 -4338,8 +4338,8 @@@ static void cm_add_one(struct ib_devic
        int count = 0;
        u8 i;
  
 -      cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
 -                       ib_device->phys_port_cnt, GFP_KERNEL);
 +      cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
 +                       GFP_KERNEL);
        if (!cm_dev)
                return;
  
index bf601c7629fb2fe1d9979aa427bd946e30fb8f6c,87bd6b60cb53cf0a9761a96e5f04577eddea7b6d..4d4371bf2c7c8b2ab59e9c435f06a23b4007ad46
  #include "chip_registers.h"
  #include "aspm.h"
  
 -/* link speed vector for Gen3 speed - not in Linux headers */
 -#define GEN1_SPEED_VECTOR 0x1
 -#define GEN2_SPEED_VECTOR 0x2
 -#define GEN3_SPEED_VECTOR 0x3
 -
  /*
   * This file contains PCIe utility routines.
   */
@@@ -178,6 -183,14 +178,14 @@@ int hfi1_pcie_ddinit(struct hfi1_devdat
                return -ENOMEM;
        }
        dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
+       /* verify that reads actually work, save revision for reset check */
+       dd->revision = readq(dd->kregbase1 + CCE_REVISION);
+       if (dd->revision == ~(u64)0) {
+               dd_dev_err(dd, "Cannot read chip CSRs\n");
+               goto nomem;
+       }
        dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
        dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
        dd->base2_start  = RCV_ARRAY + dd->chip_rcv_array_count * 8;
@@@ -257,7 -270,7 +265,7 @@@ static u32 extract_speed(u16 linkstat
        case PCI_EXP_LNKSTA_CLS_5_0GB:
                speed = 5000; /* Gen 2, 5GHz */
                break;
 -      case GEN3_SPEED_VECTOR:
 +      case PCI_EXP_LNKSTA_CLS_8_0GB:
                speed = 8000; /* Gen 3, 8GHz */
                break;
        }
@@@ -312,7 -325,7 +320,7 @@@ int pcie_speeds(struct hfi1_devdata *dd
                return ret;
        }
  
 -      if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
 +      if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
                dd_dev_info(dd,
                            "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
                            linkcap & PCI_EXP_LNKCAP_SLS);
@@@ -689,6 -702,9 +697,6 @@@ const struct pci_error_handlers hfi1_pc
  /* gasket block secondary bus reset delay */
  #define SBR_DELAY_US 200000   /* 200ms */
  
 -/* mask for PCIe capability register lnkctl2 target link speed */
 -#define LNKCTL2_TARGET_LINK_SPEED_MASK 0xf
 -
  static uint pcie_target = 3;
  module_param(pcie_target, uint, S_IRUGO);
  MODULE_PARM_DESC(pcie_target, "PCIe target speed (0 skip, 1-3 Gen1-3)");
@@@ -1037,13 -1053,13 +1045,13 @@@ int do_pcie_gen3_transition(struct hfi1
                return 0;
  
        if (pcie_target == 1) {                 /* target Gen1 */
 -              target_vector = GEN1_SPEED_VECTOR;
 +              target_vector = PCI_EXP_LNKCTL2_TLS_2_5GT;
                target_speed = 2500;
        } else if (pcie_target == 2) {          /* target Gen2 */
 -              target_vector = GEN2_SPEED_VECTOR;
 +              target_vector = PCI_EXP_LNKCTL2_TLS_5_0GT;
                target_speed = 5000;
        } else if (pcie_target == 3) {          /* target Gen3 */
 -              target_vector = GEN3_SPEED_VECTOR;
 +              target_vector = PCI_EXP_LNKCTL2_TLS_8_0GT;
                target_speed = 8000;
        } else {
                /* off or invalid target - skip */
@@@ -1282,8 -1298,8 +1290,8 @@@ retry
        dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
                    (u32)lnkctl2);
        /* only write to parent if target is not as high as ours */
 -      if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) {
 -              lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
 +      if ((lnkctl2 & PCI_EXP_LNKCTL2_TLS) < target_vector) {
 +              lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
                lnkctl2 |= target_vector;
                dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
                            (u32)lnkctl2);
  
        dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
                    (u32)lnkctl2);
 -      lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
 +      lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
        lnkctl2 |= target_vector;
        dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
                    (u32)lnkctl2);
index 69716a7ea9934a414594ffe2b1b86fdcb41a411d,3544150f34699de31bc0c8fb699b07b2d9610418..e52dd21519b45ff00268ae33c21816a8b5a96b53
@@@ -982,13 -982,21 +982,21 @@@ static int mlx5_ib_query_device(struct 
        }
  
        if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
-               resp.cqe_comp_caps.max_num =
-                       MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
-                       MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0;
-               resp.cqe_comp_caps.supported_format =
-                       MLX5_IB_CQE_RES_FORMAT_HASH |
-                       MLX5_IB_CQE_RES_FORMAT_CSUM;
                resp.response_length += sizeof(resp.cqe_comp_caps);
+               if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
+                       resp.cqe_comp_caps.max_num =
+                               MLX5_CAP_GEN(dev->mdev,
+                                            cqe_compression_max_num);
+                       resp.cqe_comp_caps.supported_format =
+                               MLX5_IB_CQE_RES_FORMAT_HASH |
+                               MLX5_IB_CQE_RES_FORMAT_CSUM;
+                       if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
+                               resp.cqe_comp_caps.supported_format |=
+                                       MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX;
+               }
        }
  
        if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
                if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))
                        resp.tunnel_offloads_caps |=
                                MLX5_IB_TUNNELED_OFFLOADS_GRE;
+               if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
+                   MLX5_FLEX_PROTO_CW_MPLS_GRE)
+                       resp.tunnel_offloads_caps |=
+                               MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE;
+               if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
+                   MLX5_FLEX_PROTO_CW_MPLS_UDP)
+                       resp.tunnel_offloads_caps |=
+                               MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
        }
  
        if (uhw->outlen) {
@@@ -1953,49 -1969,15 +1969,15 @@@ static int mlx5_ib_set_vma_data(struct 
  
  static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
  {
-       int ret;
        struct vm_area_struct *vma;
        struct mlx5_ib_vma_private_data *vma_private, *n;
        struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
-       struct task_struct *owning_process  = NULL;
-       struct mm_struct   *owning_mm       = NULL;
  
-       owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
-       if (!owning_process)
-               return;
-       owning_mm = get_task_mm(owning_process);
-       if (!owning_mm) {
-               pr_info("no mm, disassociate ucontext is pending task termination\n");
-               while (1) {
-                       put_task_struct(owning_process);
-                       usleep_range(1000, 2000);
-                       owning_process = get_pid_task(ibcontext->tgid,
-                                                     PIDTYPE_PID);
-                       if (!owning_process ||
-                           owning_process->state == TASK_DEAD) {
-                               pr_info("disassociate ucontext done, task was terminated\n");
-                               /* in case task was dead need to release the
-                                * task struct.
-                                */
-                               if (owning_process)
-                                       put_task_struct(owning_process);
-                               return;
-                       }
-               }
-       }
-       /* need to protect from a race on closing the vma as part of
-        * mlx5_ib_vma_close.
-        */
-       down_write(&owning_mm->mmap_sem);
        mutex_lock(&context->vma_private_list_mutex);
        list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
                                 list) {
                vma = vma_private->vma;
-               ret = zap_vma_ptes(vma, vma->vm_start,
-                                  PAGE_SIZE);
-               WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
+               zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
                /* context going to be destroyed, should
                 * not access ops any more.
                 */
                kfree(vma_private);
        }
        mutex_unlock(&context->vma_private_list_mutex);
-       up_write(&owning_mm->mmap_sem);
-       mmput(owning_mm);
-       put_task_struct(owning_process);
  }
  
  static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@@ -2051,10 -2030,6 +2030,6 @@@ static int mlx5_ib_mmap_clock_info_page
        if (err)
                return err;
  
-       mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n",
-                   vma->vm_start,
-                   (unsigned long long)pfn << PAGE_SHIFT);
        return mlx5_ib_set_vma_data(vma, context);
  }
  
@@@ -2149,15 -2124,14 +2124,14 @@@ static int uar_mmap(struct mlx5_ib_dev 
        err = io_remap_pfn_range(vma, vma->vm_start, pfn,
                                 PAGE_SIZE, vma->vm_page_prot);
        if (err) {
-               mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
-                           err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
+               mlx5_ib_err(dev,
+                           "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n",
+                           err, mmap_cmd2str(cmd));
                err = -EAGAIN;
                goto err;
        }
  
        pa = pfn << PAGE_SHIFT;
-       mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
-                   vma->vm_start, &pa);
  
        err = mlx5_ib_set_vma_data(vma, context);
        if (err)
@@@ -2243,10 -2217,6 +2217,6 @@@ static int mlx5_ib_mmap(struct ib_ucont
                if (io_remap_pfn_range(vma, vma->vm_start, pfn,
                                       PAGE_SIZE, vma->vm_page_prot))
                        return -EAGAIN;
-               mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
-                           vma->vm_start,
-                           (unsigned long long)pfn << PAGE_SHIFT);
                break;
        case MLX5_IB_MMAP_CLOCK_INFO:
                return mlx5_ib_mmap_clock_info_page(dev, vma, context);
@@@ -2386,7 -2356,8 +2356,8 @@@ static int mlx5_ib_dealloc_pd(struct ib
  enum {
        MATCH_CRITERIA_ENABLE_OUTER_BIT,
        MATCH_CRITERIA_ENABLE_MISC_BIT,
-       MATCH_CRITERIA_ENABLE_INNER_BIT
+       MATCH_CRITERIA_ENABLE_INNER_BIT,
+       MATCH_CRITERIA_ENABLE_MISC2_BIT
  };
  
  #define HEADER_IS_ZERO(match_criteria, headers)                                  \
@@@ -2406,6 -2377,9 +2377,9 @@@ static u8 get_match_criteria_enable(u3
        match_criteria_enable |=
                (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
                MATCH_CRITERIA_ENABLE_INNER_BIT;
+       match_criteria_enable |=
+               (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+               MATCH_CRITERIA_ENABLE_MISC2_BIT;
  
        return match_criteria_enable;
  }
@@@ -2440,6 -2414,27 +2414,27 @@@ static void set_tos(void *outer_c, voi
        MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
  }
  
+ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
+ {
+       if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
+           !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
+               return -EOPNOTSUPP;
+       if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
+           !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
+               return -EOPNOTSUPP;
+       if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
+           !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
+               return -EOPNOTSUPP;
+       if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
+           !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
+               return -EOPNOTSUPP;
+       return 0;
+ }
  #define LAST_ETH_FIELD vlan_tag
  #define LAST_IB_FIELD sl
  #define LAST_IPV4_FIELD tos
  #define LAST_TUNNEL_FIELD tunnel_id
  #define LAST_FLOW_TAG_FIELD tag_id
  #define LAST_DROP_FIELD size
+ #define LAST_COUNTERS_FIELD counters
  
  /* Field is the last supported field */
  #define FIELDS_NOT_SUPPORTED(filter, field)\
@@@ -2479,12 -2475,16 +2475,16 @@@ static int parse_flow_flow_action(cons
  static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
                           u32 *match_v, const union ib_flow_spec *ib_spec,
                           const struct ib_flow_attr *flow_attr,
-                          struct mlx5_flow_act *action)
+                          struct mlx5_flow_act *action, u32 prev_type)
  {
        void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                           misc_parameters);
        void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
                                           misc_parameters);
+       void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
+                                           misc_parameters_2);
+       void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
+                                           misc_parameters_2);
        void *headers_c;
        void *headers_v;
        int match_ipv;
                MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
                         ntohs(ib_spec->tcp_udp.val.dst_port));
                break;
+       case IB_FLOW_SPEC_GRE:
+               if (ib_spec->gre.mask.c_ks_res0_ver)
+                       return -EOPNOTSUPP;
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+                        0xff);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+                        IPPROTO_GRE);
+               MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
+                        0xffff);
+               MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
+                        ntohs(ib_spec->gre.val.protocol));
+               memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+                                   gre_key_h),
+                      &ib_spec->gre.mask.key,
+                      sizeof(ib_spec->gre.mask.key));
+               memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
+                                   gre_key_h),
+                      &ib_spec->gre.val.key,
+                      sizeof(ib_spec->gre.val.key));
+               break;
+       case IB_FLOW_SPEC_MPLS:
+               switch (prev_type) {
+               case IB_FLOW_SPEC_UDP:
+                       if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                                  ft_field_support.outer_first_mpls_over_udp),
+                                                  &ib_spec->mpls.mask.tag))
+                               return -EOPNOTSUPP;
+                       memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+                                           outer_first_mpls_over_udp),
+                              &ib_spec->mpls.val.tag,
+                              sizeof(ib_spec->mpls.val.tag));
+                       memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+                                           outer_first_mpls_over_udp),
+                              &ib_spec->mpls.mask.tag,
+                              sizeof(ib_spec->mpls.mask.tag));
+                       break;
+               case IB_FLOW_SPEC_GRE:
+                       if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                                  ft_field_support.outer_first_mpls_over_gre),
+                                                  &ib_spec->mpls.mask.tag))
+                               return -EOPNOTSUPP;
+                       memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+                                           outer_first_mpls_over_gre),
+                              &ib_spec->mpls.val.tag,
+                              sizeof(ib_spec->mpls.val.tag));
+                       memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+                                           outer_first_mpls_over_gre),
+                              &ib_spec->mpls.mask.tag,
+                              sizeof(ib_spec->mpls.mask.tag));
+                       break;
+               default:
+                       if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+                               if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                                          ft_field_support.inner_first_mpls),
+                                                          &ib_spec->mpls.mask.tag))
+                                       return -EOPNOTSUPP;
+                               memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+                                                   inner_first_mpls),
+                                      &ib_spec->mpls.val.tag,
+                                      sizeof(ib_spec->mpls.val.tag));
+                               memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+                                                   inner_first_mpls),
+                                      &ib_spec->mpls.mask.tag,
+                                      sizeof(ib_spec->mpls.mask.tag));
+                       } else {
+                               if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                                          ft_field_support.outer_first_mpls),
+                                                          &ib_spec->mpls.mask.tag))
+                                       return -EOPNOTSUPP;
+                               memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+                                                   outer_first_mpls),
+                                      &ib_spec->mpls.val.tag,
+                                      sizeof(ib_spec->mpls.val.tag));
+                               memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+                                                   outer_first_mpls),
+                                      &ib_spec->mpls.mask.tag,
+                                      sizeof(ib_spec->mpls.mask.tag));
+                       }
+               }
+               break;
        case IB_FLOW_SPEC_VXLAN_TUNNEL:
                if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
                                         LAST_TUNNEL_FIELD))
                if (ret)
                        return ret;
                break;
+       case IB_FLOW_SPEC_ACTION_COUNT:
+               if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+                                        LAST_COUNTERS_FIELD))
+                       return -EOPNOTSUPP;
+               /* for now support only one counters spec per flow */
+               if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+                       return -EINVAL;
+               action->counters = ib_spec->flow_count.counters;
+               action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+               break;
        default:
                return -EINVAL;
        }
@@@ -2867,6 -2966,17 +2966,17 @@@ static void put_flow_table(struct mlx5_
        }
  }
  
+ static void counters_clear_description(struct ib_counters *counters)
+ {
+       struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+       mutex_lock(&mcounters->mcntrs_mutex);
+       kfree(mcounters->counters_data);
+       mcounters->counters_data = NULL;
+       mcounters->cntrs_max_index = 0;
+       mutex_unlock(&mcounters->mcntrs_mutex);
+ }
  static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
  {
        struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
  
        mlx5_del_flow_rules(handler->rule);
        put_flow_table(dev, handler->prio, true);
-       mutex_unlock(&dev->flow_db->lock);
+       if (handler->ibcounters &&
+           atomic_read(&handler->ibcounters->usecnt) == 1)
+               counters_clear_description(handler->ibcounters);
  
+       mutex_unlock(&dev->flow_db->lock);
        kfree(handler);
  
        return 0;
@@@ -3007,21 -3120,143 +3120,143 @@@ static void set_underlay_qp(struct mlx5
        }
  }
  
+ static int read_flow_counters(struct ib_device *ibdev,
+                             struct mlx5_read_counters_attr *read_attr)
+ {
+       struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       return mlx5_fc_query(dev->mdev, fc,
+                            &read_attr->out[IB_COUNTER_PACKETS],
+                            &read_attr->out[IB_COUNTER_BYTES]);
+ }
+ /* flow counters currently expose two counters packets and bytes */
+ #define FLOW_COUNTERS_NUM 2
+ static int counters_set_description(struct ib_counters *counters,
+                                   enum mlx5_ib_counters_type counters_type,
+                                   struct mlx5_ib_flow_counters_desc *desc_data,
+                                   u32 ncounters)
+ {
+       struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+       u32 cntrs_max_index = 0;
+       int i;
+       if (counters_type != MLX5_IB_COUNTERS_FLOW)
+               return -EINVAL;
+       /* init the fields for the object */
+       mcounters->type = counters_type;
+       mcounters->read_counters = read_flow_counters;
+       mcounters->counters_num = FLOW_COUNTERS_NUM;
+       mcounters->ncounters = ncounters;
+       /* each counter entry have both description and index pair */
+       for (i = 0; i < ncounters; i++) {
+               if (desc_data[i].description > IB_COUNTER_BYTES)
+                       return -EINVAL;
+               if (cntrs_max_index <= desc_data[i].index)
+                       cntrs_max_index = desc_data[i].index + 1;
+       }
+       mutex_lock(&mcounters->mcntrs_mutex);
+       mcounters->counters_data = desc_data;
+       mcounters->cntrs_max_index = cntrs_max_index;
+       mutex_unlock(&mcounters->mcntrs_mutex);
+       return 0;
+ }
+ #define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+ static int flow_counters_set_data(struct ib_counters *ibcounters,
+                                 struct mlx5_ib_create_flow *ucmd)
+ {
+       struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+       struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+       struct mlx5_ib_flow_counters_desc *desc_data = NULL;
+       bool hw_hndl = false;
+       int ret = 0;
+       if (ucmd && ucmd->ncounters_data != 0) {
+               cntrs_data = ucmd->data;
+               if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
+                       return -EINVAL;
+               desc_data = kcalloc(cntrs_data->ncounters,
+                                   sizeof(*desc_data),
+                                   GFP_KERNEL);
+               if (!desc_data)
+                       return  -ENOMEM;
+               if (copy_from_user(desc_data,
+                                  u64_to_user_ptr(cntrs_data->counters_data),
+                                  sizeof(*desc_data) * cntrs_data->ncounters)) {
+                       ret = -EFAULT;
+                       goto free;
+               }
+       }
+       if (!mcounters->hw_cntrs_hndl) {
+               mcounters->hw_cntrs_hndl = mlx5_fc_create(
+                       to_mdev(ibcounters->device)->mdev, false);
+               if (!mcounters->hw_cntrs_hndl) {
+                       ret = -ENOMEM;
+                       goto free;
+               }
+               hw_hndl = true;
+       }
+       if (desc_data) {
+               /* counters already bound to at least one flow */
+               if (mcounters->cntrs_max_index) {
+                       ret = -EINVAL;
+                       goto free_hndl;
+               }
+               ret = counters_set_description(ibcounters,
+                                              MLX5_IB_COUNTERS_FLOW,
+                                              desc_data,
+                                              cntrs_data->ncounters);
+               if (ret)
+                       goto free_hndl;
+       } else if (!mcounters->cntrs_max_index) {
+               /* counters not bound yet, must have udata passed */
+               ret = -EINVAL;
+               goto free_hndl;
+       }
+       return 0;
+ free_hndl:
+       if (hw_hndl) {
+               mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
+                               mcounters->hw_cntrs_hndl);
+               mcounters->hw_cntrs_hndl = NULL;
+       }
+ free:
+       kfree(desc_data);
+       return ret;
+ }
  static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
                                                      struct mlx5_ib_flow_prio *ft_prio,
                                                      const struct ib_flow_attr *flow_attr,
                                                      struct mlx5_flow_destination *dst,
-                                                     u32 underlay_qpn)
+                                                     u32 underlay_qpn,
+                                                     struct mlx5_ib_create_flow *ucmd)
  {
        struct mlx5_flow_table  *ft = ft_prio->flow_table;
        struct mlx5_ib_flow_handler *handler;
        struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
        struct mlx5_flow_spec *spec;
-       struct mlx5_flow_destination *rule_dst = dst;
+       struct mlx5_flow_destination dest_arr[2] = {};
+       struct mlx5_flow_destination *rule_dst = dest_arr;
        const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
        unsigned int spec_index;
+       u32 prev_type = 0;
        int err = 0;
-       int dest_num = 1;
+       int dest_num = 0;
        bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
  
        if (!is_valid_attr(dev->mdev, flow_attr))
        }
  
        INIT_LIST_HEAD(&handler->list);
+       if (dst) {
+               memcpy(&dest_arr[0], dst, sizeof(*dst));
+               dest_num++;
+       }
  
        for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
                err = parse_flow_attr(dev->mdev, spec->match_criteria,
                                      spec->match_value,
-                                     ib_flow, flow_attr, &flow_act);
+                                     ib_flow, flow_attr, &flow_act,
+                                     prev_type);
                if (err < 0)
                        goto free;
  
+               prev_type = ((union ib_flow_spec *)ib_flow)->type;
                ib_flow += ((union ib_flow_spec *)ib_flow)->size;
        }
  
                goto free;
        }
  
+       if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+               err = flow_counters_set_data(flow_act.counters, ucmd);
+               if (err)
+                       goto free;
+               handler->ibcounters = flow_act.counters;
+               dest_arr[dest_num].type =
+                       MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+               dest_arr[dest_num].counter =
+                       to_mcounters(flow_act.counters)->hw_cntrs_hndl;
+               dest_num++;
+       }
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
-               rule_dst = NULL;
-               dest_num = 0;
+               if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
+                       rule_dst = NULL;
+                       dest_num = 0;
+               }
        } else {
                if (is_egress)
                        flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
                else
                        flow_act.action |=
-                               dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+                               dest_num ?  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
                                        MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
        }
  
  
        ft_prio->flow_table = ft;
  free:
-       if (err)
+       if (err && handler) {
+               if (handler->ibcounters &&
+                   atomic_read(&handler->ibcounters->usecnt) == 1)
+                       counters_clear_description(handler->ibcounters);
                kfree(handler);
+       }
        kvfree(spec);
        return err ? ERR_PTR(err) : handler;
  }
@@@ -3114,7 -3374,7 +3374,7 @@@ static struct mlx5_ib_flow_handler *cre
                                                     const struct ib_flow_attr *flow_attr,
                                                     struct mlx5_flow_destination *dst)
  {
-       return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0);
+       return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
  }
  
  static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
@@@ -3244,7 -3504,8 +3504,8 @@@ err
  
  static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                                           struct ib_flow_attr *flow_attr,
-                                          int domain)
+                                          int domain,
+                                          struct ib_udata *udata)
  {
        struct mlx5_ib_dev *dev = to_mdev(qp->device);
        struct mlx5_ib_qp *mqp = to_mqp(qp);
        struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
        struct mlx5_ib_flow_prio *ft_prio;
        bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+       struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+       size_t min_ucmd_sz, required_ucmd_sz;
        int err;
        int underlay_qpn;
  
+       if (udata && udata->inlen) {
+               min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
+                               sizeof(ucmd_hdr.reserved);
+               if (udata->inlen < min_ucmd_sz)
+                       return ERR_PTR(-EOPNOTSUPP);
+               err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+               if (err)
+                       return ERR_PTR(err);
+               /* currently supports only one counters data */
+               if (ucmd_hdr.ncounters_data > 1)
+                       return ERR_PTR(-EINVAL);
+               required_ucmd_sz = min_ucmd_sz +
+                       sizeof(struct mlx5_ib_flow_counters_data) *
+                       ucmd_hdr.ncounters_data;
+               if (udata->inlen > required_ucmd_sz &&
+                   !ib_is_udata_cleared(udata, required_ucmd_sz,
+                                        udata->inlen - required_ucmd_sz))
+                       return ERR_PTR(-EOPNOTSUPP);
+               ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+               if (!ucmd)
+                       return ERR_PTR(-ENOMEM);
+               err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+               if (err) {
+                       kfree(ucmd);
+                       return ERR_PTR(err);
+               }
+       }
        if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
                return ERR_PTR(-ENOMEM);
  
                        underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
                                        mqp->underlay_qpn : 0;
                        handler = _create_flow_rule(dev, ft_prio, flow_attr,
-                                                   dst, underlay_qpn);
+                                                   dst, underlay_qpn, ucmd);
                }
        } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
                   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
  
        mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
+       kfree(ucmd);
  
        return &handler->ibflow;
  
@@@ -3340,6 -3637,7 +3637,7 @@@ destroy_ft
  unlock:
        mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
+       kfree(ucmd);
        kfree(handler);
        return ERR_PTR(err);
  }
@@@ -4756,7 -5054,7 +5054,7 @@@ mlx5_ib_get_vector_affinity(struct ib_d
  {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
  
 -      return mlx5_get_vector_affinity(dev->mdev, comp_vector);
 +      return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
  }
  
  /* The mlx5_ib_multiport_mutex should be held when calling this function */
@@@ -5000,6 -5298,76 +5298,76 @@@ static void depopulate_specs_root(struc
        uverbs_free_spec_tree(dev->ib_dev.specs_root);
  }
  
+ static int mlx5_ib_read_counters(struct ib_counters *counters,
+                                struct ib_counters_read_attr *read_attr,
+                                struct uverbs_attr_bundle *attrs)
+ {
+       struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+       struct mlx5_read_counters_attr mread_attr = {};
+       struct mlx5_ib_flow_counters_desc *desc;
+       int ret, i;
+       mutex_lock(&mcounters->mcntrs_mutex);
+       if (mcounters->cntrs_max_index > read_attr->ncounters) {
+               ret = -EINVAL;
+               goto err_bound;
+       }
+       mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
+                                GFP_KERNEL);
+       if (!mread_attr.out) {
+               ret = -ENOMEM;
+               goto err_bound;
+       }
+       mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
+       mread_attr.flags = read_attr->flags;
+       ret = mcounters->read_counters(counters->device, &mread_attr);
+       if (ret)
+               goto err_read;
+       /* do the pass over the counters data array to assign according to the
+        * descriptions and indexing pairs
+        */
+       desc = mcounters->counters_data;
+       for (i = 0; i < mcounters->ncounters; i++)
+               read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
+ err_read:
+       kfree(mread_attr.out);
+ err_bound:
+       mutex_unlock(&mcounters->mcntrs_mutex);
+       return ret;
+ }
+ static int mlx5_ib_destroy_counters(struct ib_counters *counters)
+ {
+       struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+       counters_clear_description(counters);
+       if (mcounters->hw_cntrs_hndl)
+               mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+                               mcounters->hw_cntrs_hndl);
+       kfree(mcounters);
+       return 0;
+ }
+ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
+                                                  struct uverbs_attr_bundle *attrs)
+ {
+       struct mlx5_ib_mcounters *mcounters;
+       mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
+       if (!mcounters)
+               return ERR_PTR(-ENOMEM);
+       mutex_init(&mcounters->mcntrs_mutex);
+       return &mcounters->ibcntrs;
+ }
  void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
  {
        mlx5_ib_cleanup_multiport_master(dev);
@@@ -5243,6 -5611,9 +5611,9 @@@ int mlx5_ib_stage_caps_init(struct mlx5
        dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
        dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
        dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
+       dev->ib_dev.create_counters = mlx5_ib_create_counters;
+       dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
+       dev->ib_dev.read_counters = mlx5_ib_read_counters;
  
        err = init_node_data(dev);
        if (err)
index e2caabb8a92666057405068a64666fc2b2a05cce,614a954d0757e9b96583c862b8c06f1e24ea27ed..710032f1fad7ece2714b271808e50de2c5930c29
@@@ -414,7 -414,7 +414,7 @@@ int qedr_mmap(struct ib_ucontext *conte
  
        if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
                DP_ERR(dev,
-                      "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n",
+                      "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
                       (void *)vma->vm_start, (void *)vma->vm_end);
                return -EINVAL;
        }
@@@ -2577,7 -2577,7 +2577,7 @@@ static int qedr_set_page(struct ib_mr *
        u32 pbes_in_page;
  
        if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
-               DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
+               DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
                return -ENOMEM;
        }
  
@@@ -3276,7 -3276,7 +3276,7 @@@ int qedr_post_recv(struct ib_qp *ibqp, 
                                SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
                                          wr->num_sge);
  
 -                      SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
 +                      SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
                                  wr->sg_list[i].lkey);
  
                        RQ_SGE_SET(rqe, wr->sg_list[i].addr,
                        /* First one must include the number
                         * of SGE in the list
                         */
 -                      SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
 +                      SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
                        SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
  
                        RQ_SGE_SET(rqe, 0, 0, flags);
index 6cab1dd66d1b7d2932f0d44c22120907fdbc0012,bb9665b7e8e7a8a7cf92fc289d1af69f39d08327..f63dfbcd29fea1efc2237d6dcecdbdd74259e1a0
@@@ -200,7 -200,7 +200,7 @@@ __esw_fdb_set_vport_rule(struct mlx5_es
        spec->match_criteria_enable = match_header;
        flow_act.action =  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        flow_rule =
 -              mlx5_add_flow_rules(esw->fdb_table.fdb, spec,
 +              mlx5_add_flow_rules(esw->fdb_table.legacy.fdb, spec,
                                    &flow_act, &dest, 1);
        if (IS_ERR(flow_rule)) {
                esw_warn(esw->dev,
@@@ -282,7 -282,7 +282,7 @@@ static int esw_create_legacy_fdb_table(
                esw_warn(dev, "Failed to create FDB Table err %d\n", err);
                goto out;
        }
 -      esw->fdb_table.fdb = fdb;
 +      esw->fdb_table.legacy.fdb = fdb;
  
        /* Addresses group : Full match unicast/multicast addresses */
        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@@ -343,9 -343,9 +343,9 @@@ out
                        mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
                        esw->fdb_table.legacy.addr_grp = NULL;
                }
 -              if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) {
 -                      mlx5_destroy_flow_table(esw->fdb_table.fdb);
 -                      esw->fdb_table.fdb = NULL;
 +              if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.fdb)) {
 +                      mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
 +                      esw->fdb_table.legacy.fdb = NULL;
                }
        }
  
  
  static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
  {
 -      if (!esw->fdb_table.fdb)
 +      if (!esw->fdb_table.legacy.fdb)
                return;
  
        esw_debug(esw->dev, "Destroy FDB Table\n");
        mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
        mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
        mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
 -      mlx5_destroy_flow_table(esw->fdb_table.fdb);
 -      esw->fdb_table.fdb = NULL;
 +      mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
 +      esw->fdb_table.legacy.fdb = NULL;
        esw->fdb_table.legacy.addr_grp = NULL;
        esw->fdb_table.legacy.allmulti_grp = NULL;
        esw->fdb_table.legacy.promisc_grp = NULL;
@@@ -396,7 -396,7 +396,7 @@@ static int esw_add_uc_addr(struct mlx5_
  
  fdb_add:
        /* SRIOV is enabled: Forward UC MAC to vport */
 -      if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY)
 +      if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY)
                vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
  
        esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
@@@ -486,7 -486,7 +486,7 @@@ static int esw_add_mc_addr(struct mlx5_
        u8 *mac = vaddr->node.addr;
        u32 vport = vaddr->vport;
  
 -      if (!esw->fdb_table.fdb)
 +      if (!esw->fdb_table.legacy.fdb)
                return 0;
  
        esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
@@@ -526,7 -526,7 +526,7 @@@ static int esw_del_mc_addr(struct mlx5_
        u8 *mac = vaddr->node.addr;
        u32 vport = vaddr->vport;
  
 -      if (!esw->fdb_table.fdb)
 +      if (!esw->fdb_table.legacy.fdb)
                return 0;
  
        esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
@@@ -2104,21 -2104,18 +2104,18 @@@ static int mlx5_eswitch_query_vport_dro
        struct mlx5_vport *vport = &esw->vports[vport_idx];
        u64 rx_discard_vport_down, tx_discard_vport_down;
        u64 bytes = 0;
-       u16 idx = 0;
        int err = 0;
  
        if (!vport->enabled || esw->mode != SRIOV_LEGACY)
                return 0;
  
-       if (vport->egress.drop_counter) {
-               idx = vport->egress.drop_counter->id;
-               mlx5_fc_query(dev, idx, &stats->rx_dropped, &bytes);
-       }
+       if (vport->egress.drop_counter)
+               mlx5_fc_query(dev, vport->egress.drop_counter,
+                             &stats->rx_dropped, &bytes);
  
-       if (vport->ingress.drop_counter) {
-               idx = vport->ingress.drop_counter->id;
-               mlx5_fc_query(dev, idx, &stats->tx_dropped, &bytes);
-       }
+       if (vport->ingress.drop_counter)
+               mlx5_fc_query(dev, vport->ingress.drop_counter,
+                             &stats->tx_dropped, &bytes);
  
        if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
            !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
@@@ -2175,35 -2172,26 +2172,35 @@@ int mlx5_eswitch_get_vport_stats(struc
        memset(vf_stats, 0, sizeof(*vf_stats));
        vf_stats->rx_packets =
                MLX5_GET_CTR(out, received_eth_unicast.packets) +
 +              MLX5_GET_CTR(out, received_ib_unicast.packets) +
                MLX5_GET_CTR(out, received_eth_multicast.packets) +
 +              MLX5_GET_CTR(out, received_ib_multicast.packets) +
                MLX5_GET_CTR(out, received_eth_broadcast.packets);
  
        vf_stats->rx_bytes =
                MLX5_GET_CTR(out, received_eth_unicast.octets) +
 +              MLX5_GET_CTR(out, received_ib_unicast.octets) +
                MLX5_GET_CTR(out, received_eth_multicast.octets) +
 +              MLX5_GET_CTR(out, received_ib_multicast.octets) +
                MLX5_GET_CTR(out, received_eth_broadcast.octets);
  
        vf_stats->tx_packets =
                MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
 +              MLX5_GET_CTR(out, transmitted_ib_unicast.packets) +
                MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
 +              MLX5_GET_CTR(out, transmitted_ib_multicast.packets) +
                MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
  
        vf_stats->tx_bytes =
                MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
 +              MLX5_GET_CTR(out, transmitted_ib_unicast.octets) +
                MLX5_GET_CTR(out, transmitted_eth_multicast.octets) +
 +              MLX5_GET_CTR(out, transmitted_ib_multicast.octets) +
                MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
  
        vf_stats->multicast =
 -              MLX5_GET_CTR(out, received_eth_multicast.packets);
 +              MLX5_GET_CTR(out, received_eth_multicast.packets) +
 +              MLX5_GET_CTR(out, received_ib_multicast.packets);
  
        vf_stats->broadcast =
                MLX5_GET_CTR(out, received_eth_broadcast.packets);
index e1b609c61d5936ce2fece3df606968adbdf346bc,f246e1f1338f27743710003efb612fc0ec21156b..49a75d31185ecf25ff93c5f3a9beec6b48be28a1
@@@ -187,7 -187,6 +187,7 @@@ static void del_sw_ns(struct fs_node *n
  static void del_sw_hw_rule(struct fs_node *node);
  static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
                                struct mlx5_flow_destination *d2);
 +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
  static struct mlx5_flow_rule *
  find_flow_rule(struct fs_fte *fte,
               struct mlx5_flow_destination *dest);
@@@ -325,7 -324,8 +325,8 @@@ static bool check_valid_mask(u8 match_c
        if (match_criteria_enable & ~(
                (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)   |
                (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) |
-               (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)))
+               (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) |
+               (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)))
                return false;
  
        if (!(match_criteria_enable &
                        return false;
        }
  
+       if (!(match_criteria_enable &
+             1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)) {
+               char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
+                                                 match_criteria, misc_parameters_2);
+               if (fg_type_mask[0] ||
+                   memcmp(fg_type_mask, fg_type_mask + 1,
+                          MLX5_ST_SZ_BYTES(fte_match_set_misc2) - 1))
+                       return false;
+       }
        return check_last_reserved(match_criteria);
  }
  
@@@ -482,8 -493,7 +494,8 @@@ static void del_sw_hw_rule(struct fs_no
  
        if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
            --fte->dests_size) {
 -              modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
 +              modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
 +                            BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
                fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
                update_fte = true;
                goto out;
@@@ -1191,7 -1201,8 +1203,7 @@@ static struct mlx5_flow_handle *alloc_h
  {
        struct mlx5_flow_handle *handle;
  
 -      handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) *
 -                        num_rules, GFP_KERNEL);
 +      handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL);
        if (!handle)
                return NULL;
  
@@@ -2362,27 -2373,23 +2374,27 @@@ static int create_anchor_flow_table(str
  
  static int init_root_ns(struct mlx5_flow_steering *steering)
  {
 +      int err;
 +
        steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
        if (!steering->root_ns)
 -              goto cleanup;
 +              return -ENOMEM;
  
 -      if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
 -              goto cleanup;
 +      err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
 +      if (err)
 +              goto out_err;
  
        set_prio_attrs(steering->root_ns);
 -
 -      if (create_anchor_flow_table(steering))
 -              goto cleanup;
 +      err = create_anchor_flow_table(steering);
 +      if (err)
 +              goto out_err;
  
        return 0;
  
 -cleanup:
 -      mlx5_cleanup_fs(steering->dev);
 -      return -ENOMEM;
 +out_err:
 +      cleanup_root_ns(steering->root_ns);
 +      steering->root_ns = NULL;
 +      return err;
  }
  
  static void clean_tree(struct fs_node *node)
@@@ -2494,7 -2501,7 +2506,7 @@@ static int init_fdb_root_ns(struct mlx5
        if (!steering->fdb_root_ns)
                return -ENOMEM;
  
 -      prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1);
 +      prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2);
        if (IS_ERR(prio))
                goto out_err;
  
index 0f006cf8343df0d234408e34d8014fabb318bb0c,5004ddc702e3895d1f6b6a754fbe378a881598be..02f72ebf31a78f221b21eaf74ee9b6debcbe998e
@@@ -314,7 -314,6 +314,7 @@@ enum mlx5_event 
        MLX5_EVENT_TYPE_PORT_CHANGE        = 0x09,
        MLX5_EVENT_TYPE_GPIO_EVENT         = 0x15,
        MLX5_EVENT_TYPE_PORT_MODULE_EVENT  = 0x16,
 +      MLX5_EVENT_TYPE_TEMP_WARN_EVENT    = 0x17,
        MLX5_EVENT_TYPE_REMOTE_CONFIG      = 0x19,
        MLX5_EVENT_TYPE_GENERAL_EVENT      = 0x22,
        MLX5_EVENT_TYPE_PPS_EVENT          = 0x25,
        MLX5_EVENT_TYPE_DCT_DRAINED        = 0x1c,
  
        MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
 +      MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
  };
  
  enum {
@@@ -628,11 -626,6 +628,11 @@@ struct mlx5_eqe_dct 
        __be32  dctn;
  };
  
 +struct mlx5_eqe_temp_warning {
 +      __be64 sensor_warning_msb;
 +      __be64 sensor_warning_lsb;
 +} __packed;
 +
  union ev_data {
        __be32                          raw[7];
        struct mlx5_eqe_cmd             cmd;
        struct mlx5_eqe_port_module     port_module;
        struct mlx5_eqe_pps             pps;
        struct mlx5_eqe_dct             dct;
 +      struct mlx5_eqe_temp_warning    temp_warning;
  } __packed;
  
  struct mlx5_eqe {
@@@ -1002,6 -994,18 +1002,18 @@@ enum mlx5_wol_mode 
        MLX5_WOL_PHY_ACTIVITY   = 1 << 7,
  };
  
+ enum mlx5_mpls_supported_fields {
+       MLX5_FIELD_SUPPORT_MPLS_LABEL = 1 << 0,
+       MLX5_FIELD_SUPPORT_MPLS_EXP   = 1 << 1,
+       MLX5_FIELD_SUPPORT_MPLS_S_BOS = 1 << 2,
+       MLX5_FIELD_SUPPORT_MPLS_TTL   = 1 << 3
+ };
+ enum mlx5_flex_parser_protos {
+       MLX5_FLEX_PROTO_CW_MPLS_GRE   = 1 << 4,
+       MLX5_FLEX_PROTO_CW_MPLS_UDP   = 1 << 5,
+ };
  /* MLX5 DEV CAPs */
  
  /* TODO: EAT.ME */
@@@ -1160,9 -1164,6 +1172,9 @@@ enum mlx5_qcam_feature_groups 
  #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
        MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
  
 +#define MLX5_CAP_PCAM_REG(mdev, reg) \
 +      MLX5_GET(pcam_reg, (mdev)->caps.pcam, port_access_reg_cap_mask.regs_5000_to_507f.reg)
 +
  #define MLX5_CAP_MCAM_REG(mdev, reg) \
        MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg)
  
index 8e0b8865f91e1e9b02ea16718119488d2b95b63a,16876fe1710bd916ba313af532f967a9f297a145..27134c4fcb76eb5140ff4828066e73e11d671cd9
@@@ -60,7 -60,6 +60,7 @@@ enum 
        MLX5_EVENT_TYPE_CODING_COMMAND_INTERFACE_COMPLETION        = 0xa,
        MLX5_EVENT_TYPE_CODING_PAGE_REQUEST                        = 0xb,
        MLX5_EVENT_TYPE_CODING_FPGA_ERROR                          = 0x20,
 +      MLX5_EVENT_TYPE_CODING_FPGA_QP_ERROR                       = 0x21
  };
  
  enum {
@@@ -299,9 -298,15 +299,15 @@@ struct mlx5_ifc_flow_table_fields_suppo
        u8         inner_tcp_dport[0x1];
        u8         inner_tcp_flags[0x1];
        u8         reserved_at_37[0x9];
-       u8         reserved_at_40[0x17];
+       u8         reserved_at_40[0x5];
+       u8         outer_first_mpls_over_udp[0x4];
+       u8         outer_first_mpls_over_gre[0x4];
+       u8         inner_first_mpls[0x4];
+       u8         outer_first_mpls[0x4];
+       u8         reserved_at_55[0x2];
        u8         outer_esp_spi[0x1];
-       u8         reserved_at_58[0x2];
+       u8         reserved_at_58[0x2];
        u8         bth_dst_qp[0x1];
  
        u8         reserved_at_5b[0x25];
@@@ -357,6 -362,22 +363,6 @@@ struct mlx5_ifc_odp_per_transport_servi
        u8         reserved_at_6[0x1a];
  };
  
 -struct mlx5_ifc_ipv4_layout_bits {
 -      u8         reserved_at_0[0x60];
 -
 -      u8         ipv4[0x20];
 -};
 -
 -struct mlx5_ifc_ipv6_layout_bits {
 -      u8         ipv6[16][0x8];
 -};
 -
 -union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
 -      struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
 -      struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
 -      u8         reserved_at_0[0x80];
 -};
 -
  struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
        u8         smac_47_16[0x20];
  
@@@ -435,6 -456,29 +441,29 @@@ struct mlx5_ifc_fte_match_set_misc_bit
        u8         reserved_at_1a0[0x60];
  };
  
+ struct mlx5_ifc_fte_match_mpls_bits {
+       u8         mpls_label[0x14];
+       u8         mpls_exp[0x3];
+       u8         mpls_s_bos[0x1];
+       u8         mpls_ttl[0x8];
+ };
+ struct mlx5_ifc_fte_match_set_misc2_bits {
+       struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls;
+       struct mlx5_ifc_fte_match_mpls_bits inner_first_mpls;
+       struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_gre;
+       struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp;
+       u8         reserved_at_80[0x100];
+       u8         metadata_reg_a[0x20];
+       u8         reserved_at_1a0[0x60];
+ };
  struct mlx5_ifc_cmd_pas_bits {
        u8         pa_h[0x20];
  
@@@ -525,9 -569,7 +554,9 @@@ struct mlx5_ifc_flow_table_nic_cap_bit
  };
  
  struct mlx5_ifc_flow_table_eswitch_cap_bits {
 -      u8     reserved_at_0[0x200];
 +      u8      reserved_at_0[0x1c];
 +      u8      fdb_multi_path_to_table[0x1];
 +      u8      reserved_at_1d[0x1e3];
  
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
  
@@@ -913,7 -955,7 +942,7 @@@ struct mlx5_ifc_cmd_hca_cap_bits 
        u8         log_max_msg[0x5];
        u8         reserved_at_1c8[0x4];
        u8         max_tc[0x4];
 -      u8         reserved_at_1d0[0x1];
 +      u8         temp_warn_event[0x1];
        u8         dcbx[0x1];
        u8         general_notification_event[0x1];
        u8         reserved_at_1d3[0x2];
  
        u8         reserved_at_500[0x20];
        u8         num_of_uars_per_page[0x20];
-       u8         reserved_at_540[0x40];
  
-       u8         reserved_at_580[0x3d];
+       u8         flex_parser_protocols[0x20];
+       u8         reserved_at_560[0x20];
+       u8         reserved_at_580[0x3c];
+       u8         mini_cqe_resp_stride_index[0x1];
        u8         cqe_128_always[0x1];
        u8         cqe_compression_128[0x1];
        u8         cqe_compression[0x1];
@@@ -1159,7 -1204,9 +1191,9 @@@ struct mlx5_ifc_fte_match_param_bits 
  
        struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers;
  
-       u8         reserved_at_600[0xa00];
+       struct mlx5_ifc_fte_match_set_misc2_bits misc_parameters_2;
+       u8         reserved_at_800[0x800];
  };
  
  enum {
@@@ -4568,6 -4615,7 +4602,7 @@@ enum 
        MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS    = 0x0,
        MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS  = 0x1,
        MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_INNER_HEADERS    = 0x2,
+       MLX5_QUERY_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0X3,
  };
  
  struct mlx5_ifc_query_flow_group_out_bits {
@@@ -6958,9 -7006,10 +6993,10 @@@ struct mlx5_ifc_create_flow_group_out_b
  };
  
  enum {
-       MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS    = 0x0,
-       MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS  = 0x1,
-       MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS    = 0x2,
+       MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS     = 0x0,
+       MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS   = 0x1,
+       MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS     = 0x2,
+       MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2 = 0x3,
  };
  
  struct mlx5_ifc_create_flow_group_in_bits {
@@@ -8006,17 -8055,6 +8042,17 @@@ struct mlx5_ifc_pcam_enhanced_features_
        u8         ppcnt_statistical_group[0x1];
  };
  
 +struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
 +      u8         port_access_reg_cap_mask_127_to_96[0x20];
 +      u8         port_access_reg_cap_mask_95_to_64[0x20];
 +      u8         port_access_reg_cap_mask_63_to_32[0x20];
 +
 +      u8         port_access_reg_cap_mask_31_to_13[0x13];
 +      u8         pbmc[0x1];
 +      u8         pptb[0x1];
 +      u8         port_access_reg_cap_mask_10_to_0[0xb];
 +};
 +
  struct mlx5_ifc_pcam_reg_bits {
        u8         reserved_at_0[0x8];
        u8         feature_group[0x8];
        u8         reserved_at_20[0x20];
  
        union {
 +              struct mlx5_ifc_pcam_regs_5000_to_507f_bits regs_5000_to_507f;
                u8         reserved_at_0[0x80];
        } port_access_reg_cap_mask;
  
@@@ -8791,41 -8828,6 +8827,41 @@@ struct mlx5_ifc_qpts_reg_bits 
        u8         trust_state[0x3];
  };
  
 +struct mlx5_ifc_pptb_reg_bits {
 +      u8         reserved_at_0[0x2];
 +      u8         mm[0x2];
 +      u8         reserved_at_4[0x4];
 +      u8         local_port[0x8];
 +      u8         reserved_at_10[0x6];
 +      u8         cm[0x1];
 +      u8         um[0x1];
 +      u8         pm[0x8];
 +
 +      u8         prio_x_buff[0x20];
 +
 +      u8         pm_msb[0x8];
 +      u8         reserved_at_48[0x10];
 +      u8         ctrl_buff[0x4];
 +      u8         untagged_buff[0x4];
 +};
 +
 +struct mlx5_ifc_pbmc_reg_bits {
 +      u8         reserved_at_0[0x8];
 +      u8         local_port[0x8];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         xoff_timer_value[0x10];
 +      u8         xoff_refresh[0x10];
 +
 +      u8         reserved_at_40[0x9];
 +      u8         fullness_threshold[0x7];
 +      u8         port_buffer_size[0x10];
 +
 +      struct mlx5_ifc_bufferx_reg_bits buffer[10];
 +
 +      u8         reserved_at_2e0[0x40];
 +};
 +
  struct mlx5_ifc_qtct_reg_bits {
        u8         reserved_at_0[0x8];
        u8         port_number[0x8];
diff --combined include/linux/mm.h
index 0495e6f97fae1d5ba3043e08c07824b52fde57ad,edf44265c752ab6042a7743d57fe155766aa0e32..29c5458b4b5e3a2ac64f2f68f85755e99d185bda
@@@ -25,7 -25,6 +25,7 @@@
  #include <linux/err.h>
  #include <linux/page_ref.h>
  #include <linux/memremap.h>
 +#include <linux/overflow.h>
  
  struct mempolicy;
  struct anon_vma;
@@@ -229,21 -228,15 +229,21 @@@ extern unsigned int kobjsize(const voi
  #define VM_HIGH_ARCH_4        BIT(VM_HIGH_ARCH_BIT_4)
  #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
  
 -#if defined(CONFIG_X86)
 -# define VM_PAT               VM_ARCH_1       /* PAT reserves whole VMA at once (x86) */
 -#if defined (CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS)
 +#ifdef CONFIG_ARCH_HAS_PKEYS
  # define VM_PKEY_SHIFT        VM_HIGH_ARCH_BIT_0
  # define VM_PKEY_BIT0 VM_HIGH_ARCH_0  /* A protection key is a 4-bit value */
 -# define VM_PKEY_BIT1 VM_HIGH_ARCH_1
 +# define VM_PKEY_BIT1 VM_HIGH_ARCH_1  /* on x86 and 5-bit value on ppc64   */
  # define VM_PKEY_BIT2 VM_HIGH_ARCH_2
  # define VM_PKEY_BIT3 VM_HIGH_ARCH_3
 +#ifdef CONFIG_PPC
 +# define VM_PKEY_BIT4  VM_HIGH_ARCH_4
 +#else
 +# define VM_PKEY_BIT4  0
  #endif
 +#endif /* CONFIG_ARCH_HAS_PKEYS */
 +
 +#if defined(CONFIG_X86)
 +# define VM_PAT               VM_ARCH_1       /* PAT reserves whole VMA at once (x86) */
  #elif defined(CONFIG_PPC)
  # define VM_SAO               VM_ARCH_1       /* Strong Access Ordering (powerpc) */
  #elif defined(CONFIG_PARISC)
@@@ -386,7 -379,7 +386,7 @@@ enum page_entry_size 
  /*
   * These are the virtual MM functions - opening of an area, closing and
   * unmapping it (needed to keep files on disk up-to-date etc), pointer
-  * to the functions called when a no-page or a wp-page exception occurs. 
+  * to the functions called when a no-page or a wp-page exception occurs.
   */
  struct vm_operations_struct {
        void (*open)(struct vm_area_struct * area);
@@@ -567,12 -560,10 +567,12 @@@ static inline void *kvzalloc(size_t siz
  
  static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
  {
 -      if (size != 0 && n > SIZE_MAX / size)
 +      size_t bytes;
 +
 +      if (unlikely(check_mul_overflow(n, size, &bytes)))
                return NULL;
  
 -      return kvmalloc(n * size, flags);
 +      return kvmalloc(bytes, flags);
  }
  
  extern void kvfree(const void *addr);
@@@ -1276,10 -1267,10 +1276,10 @@@ struct page *_vm_normal_page(struct vm_
  struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
                                pmd_t pmd);
  
int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
-               unsigned long size);
void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
+                 unsigned long size);
  void zap_page_range(struct vm_area_struct *vma, unsigned long address,
-               unsigned long size);
+                   unsigned long size);
  void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
                unsigned long start, unsigned long end);
  
@@@ -2118,6 -2109,7 +2118,6 @@@ extern void setup_per_cpu_pageset(void)
  
  extern void zone_pcp_update(struct zone *zone);
  extern void zone_pcp_reset(struct zone *zone);
 -extern void setup_zone_pageset(struct zone *zone);
  
  /* page_alloc.c */
  extern int min_free_kbytes;
@@@ -2474,13 -2466,6 +2474,13 @@@ static inline vm_fault_t vmf_insert_pfn
        return VM_FAULT_NOPAGE;
  }
  
 +static inline vm_fault_t vmf_error(int err)
 +{
 +      if (err == -ENOMEM)
 +              return VM_FAULT_OOM;
 +      return VM_FAULT_SIGBUS;
 +}
 +
  struct page *follow_page_mask(struct vm_area_struct *vma,
                              unsigned long address, unsigned int foll_flags,
                              unsigned int *page_mask);
@@@ -2508,7 -2493,6 +2508,7 @@@ static inline struct page *follow_page(
  #define FOLL_MLOCK    0x1000  /* lock present pages */
  #define FOLL_REMOTE   0x2000  /* we are working on non-current tsk/mm */
  #define FOLL_COW      0x4000  /* internal GUP flag */
 +#define FOLL_ANON     0x8000  /* don't do file mappings */
  
  static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
  {
This page took 0.161108 seconds and 4 git commands to generate.