static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
- struct dpaa2_eth_priv *priv = netdev_priv(dev);
-
switch (xdp->command) {
case XDP_SETUP_PROG:
return setup_xdp(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
- break;
default:
return -EINVAL;
}
/* Update NAPI statistics */
ch->stats.cdan++;
- napi_schedule_irqoff(&ch->napi);
+ napi_schedule(&ch->napi);
}
/* Allocate and configure a DPCON object */
~IFF_PROMISC;
goto out_promisc;
}
+ ice_cfg_vlan_pruning(vsi, false, false);
}
} else {
/* Clear Rx filter to remove traffic from wire */
IFF_PROMISC;
goto out_promisc;
}
+ if (vsi->num_vlan > 1)
+ ice_cfg_vlan_pruning(vsi, true, false);
}
}
}
}
}
+/**
+ * ice_set_dflt_mib - send a default config MIB to the FW
+ * @pf: private PF struct
+ *
+ * This function sends a default configuration MIB to the FW.
+ *
+ * If this function errors out at any point, the driver is still able to
+ * function. The main impact is that LFC may not operate as expected.
+ * Therefore an error state in this function should be treated with a DBG
+ * message and continue on with driver rebuild/reenable.
+ */
+static void ice_set_dflt_mib(struct ice_pf *pf)
+{
+ struct device *dev = ice_pf_to_dev(pf);
+ u8 mib_type, *buf, *lldpmib = NULL;
+ u16 len, typelen, offset = 0;
+ struct ice_lldp_org_tlv *tlv;
+ struct ice_hw *hw;
+ u32 ouisubtype;
+
+ if (!pf) {
+ dev_dbg(dev, "%s NULL pf pointer\n", __func__);
+ return;
+ }
+
+ hw = &pf->hw;
+ mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+ lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
+ if (!lldpmib) {
+ dev_dbg(dev, "%s Failed to allocate MIB memory\n",
+ __func__);
+ return;
+ }
+
+ /* Add ETS CFG TLV */
+ tlv = (struct ice_lldp_org_tlv *)lldpmib;
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_ETS_TLV_LEN);
+ tlv->typelen = htons(typelen);
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_ETS_CFG);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ buf = tlv->tlvinfo;
+ buf[0] = 0;
+
+ /* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
+ * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
+ * Octets 13 - 20 are TSA values - leave as zeros
+ */
+ buf[5] = 0x64;
+ len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+ offset += len + 2;
+ tlv = (struct ice_lldp_org_tlv *)
+ ((char *)tlv + sizeof(tlv->typelen) + len);
+
+ /* Add ETS REC TLV */
+ buf = tlv->tlvinfo;
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_ETS_REC);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* First octet of buf is reserved
+ * Octets 1 - 4 map UP to TC - all UPs map to zero
+ * Octets 5 - 12 are BW values - set TC 0 to 100%.
+ * Octets 13 - 20 are TSA value - leave as zeros
+ */
+ buf[5] = 0x64;
+ offset += len + 2;
+ tlv = (struct ice_lldp_org_tlv *)
+ ((char *)tlv + sizeof(tlv->typelen) + len);
+
+ /* Add PFC CFG TLV */
+ typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+ ICE_IEEE_PFC_TLV_LEN);
+ tlv->typelen = htons(typelen);
+
+ ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+ ICE_IEEE_SUBTYPE_PFC_CFG);
+ tlv->ouisubtype = htonl(ouisubtype);
+
+ /* Octet 1 left as all zeros - PFC disabled */
+ buf[0] = 0x08;
+ len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+ offset += len + 2;
+
+ if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
+ dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
+
+ kfree(lldpmib);
+}
+
/**
* ice_link_event - process the link event
* @pf: PF that the link event is associated with
dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n",
pi->lport);
+ /* Check if the link state is up after updating link info, and treat
+ * this event as an UP event since the link is actually UP now.
+ */
+ if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
+ link_up = true;
+
vsi = ice_get_main_vsi(pf);
if (!vsi || !vsi->port_info)
return -EINVAL;
if (link_up == old_link && link_speed == old_link_speed)
return result;
- ice_dcb_rebuild(pf);
+ if (ice_is_dcb_active(pf)) {
+ if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+ ice_dcb_rebuild(pf);
+ } else {
+ if (link_up)
+ ice_set_dflt_mib(pf);
+ }
ice_vsi_link_event(vsi, link_up);
ice_print_link_msg(vsi, link_up);
return status;
}
+enum ice_aq_task_state {
+ ICE_AQ_TASK_WAITING = 0,
+ ICE_AQ_TASK_COMPLETE,
+ ICE_AQ_TASK_CANCELED,
+};
+
+struct ice_aq_task {
+ struct hlist_node entry;
+
+ u16 opcode;
+ struct ice_rq_event_info *event;
+ enum ice_aq_task_state state;
+};
+
+/**
+ * ice_wait_for_aq_event - Wait for an AdminQ event from firmware
+ * @pf: pointer to the PF private structure
+ * @opcode: the opcode to wait for
+ * @timeout: how long to wait, in jiffies
+ * @event: storage for the event info
+ *
+ * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
+ * current thread will be put to sleep until the specified event occurs or
+ * until the given timeout is reached.
+ *
+ * To obtain only the descriptor contents, pass an event without an allocated
+ * msg_buf. If the complete data buffer is desired, allocate the
+ * event->msg_buf with enough space ahead of time.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
+ struct ice_rq_event_info *event)
+{
+ struct ice_aq_task *task;
+ long ret;
+ int err;
+
+ task = kzalloc(sizeof(*task), GFP_KERNEL);
+ if (!task)
+ return -ENOMEM;
+
+ INIT_HLIST_NODE(&task->entry);
+ task->opcode = opcode;
+ task->event = event;
+ task->state = ICE_AQ_TASK_WAITING;
+
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_add_head(&task->entry, &pf->aq_wait_list);
+ spin_unlock_bh(&pf->aq_wait_lock);
+
+ ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
+ timeout);
+ switch (task->state) {
+ case ICE_AQ_TASK_WAITING:
+ err = ret < 0 ? ret : -ETIMEDOUT;
+ break;
+ case ICE_AQ_TASK_CANCELED:
+ err = ret < 0 ? ret : -ECANCELED;
+ break;
+ case ICE_AQ_TASK_COMPLETE:
+ err = ret < 0 ? ret : 0;
+ break;
+ default:
+ WARN(1, "Unexpected AdminQ wait task state %u", task->state);
+ err = -EINVAL;
+ break;
+ }
+
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_del(&task->entry);
+ spin_unlock_bh(&pf->aq_wait_lock);
+ kfree(task);
+
+ return err;
+}
+
+/**
+ * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
+ * @pf: pointer to the PF private structure
+ * @opcode: the opcode of the event
+ * @event: the event to check
+ *
+ * Loops over the current list of pending threads waiting for an AdminQ event.
+ * For each matching task, copy the contents of the event into the task
+ * structure and wake up the thread.
+ *
+ * If multiple threads wait for the same opcode, they will all be woken up.
+ *
+ * Note that event->msg_buf will only be duplicated if the event has a buffer
+ * with enough space already allocated. Otherwise, only the descriptor and
+ * message length will be copied.
+ *
+ * Returns: true if an event was found, false otherwise
+ */
+static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
+ struct ice_rq_event_info *event)
+{
+ struct ice_aq_task *task;
+ bool found = false;
+
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
+ if (task->state || task->opcode != opcode)
+ continue;
+
+ memcpy(&task->event->desc, &event->desc, sizeof(event->desc));
+ task->event->msg_len = event->msg_len;
+
+ /* Only copy the data buffer if a destination was set */
+ if (task->event->msg_buf &&
+ task->event->buf_len > event->buf_len) {
+ memcpy(task->event->msg_buf, event->msg_buf,
+ event->buf_len);
+ task->event->buf_len = event->buf_len;
+ }
+
+ task->state = ICE_AQ_TASK_COMPLETE;
+ found = true;
+ }
+ spin_unlock_bh(&pf->aq_wait_lock);
+
+ if (found)
+ wake_up(&pf->aq_wait_queue);
+}
+
+/**
+ * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
+ * @pf: the PF private structure
+ *
+ * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
+ * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
+ */
+static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
+{
+ struct ice_aq_task *task;
+
+ spin_lock_bh(&pf->aq_wait_lock);
+ hlist_for_each_entry(task, &pf->aq_wait_list, entry)
+ task->state = ICE_AQ_TASK_CANCELED;
+ spin_unlock_bh(&pf->aq_wait_lock);
+
+ wake_up(&pf->aq_wait_queue);
+}
+
/**
* __ice_clean_ctrlq - helper function to clean controlq rings
* @pf: ptr to struct ice_pf
opcode = le16_to_cpu(event.desc.opcode);
+ /* Notify any thread that might be waiting for this event */
+ ice_aq_check_events(pf, opcode, &event);
+
switch (opcode) {
case ice_aqc_opc_get_link_status:
if (ice_handle_link_event(pf, &event))
switch (xdp->command) {
case XDP_SETUP_PROG:
return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
- return 0;
case XDP_SETUP_XSK_UMEM:
return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);
mutex_init(&pf->sw_mutex);
mutex_init(&pf->tc_mutex);
+ INIT_HLIST_HEAD(&pf->aq_wait_list);
+ spin_lock_init(&pf->aq_wait_lock);
+ init_waitqueue_head(&pf->aq_wait_queue);
+
/* setup service timer and periodic service task */
timer_setup(&pf->serv_tmr, ice_service_timer, 0);
pf->serv_tmr_period = HZ;
return err;
}
+/**
+ * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
+ * @pf: PF to configure
+ *
+ * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
+ * VSI can still Tx/Rx VLAN tagged packets.
+ */
+static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
+{
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
+ struct ice_vsi_ctx *ctxt;
+ enum ice_status status;
+ struct ice_hw *hw;
+
+ if (!vsi)
+ return;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ return;
+
+ hw = &pf->hw;
+ ctxt->info = vsi->info;
+
+ ctxt->info.valid_sections =
+ cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+ ICE_AQ_VSI_PROP_SECURITY_VALID |
+ ICE_AQ_VSI_PROP_SW_VALID);
+
+ /* disable VLAN anti-spoof */
+ ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+
+ /* disable VLAN pruning and keep all other settings */
+ ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+ /* allow all VLANs on Tx and don't strip on Rx */
+ ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL |
+ ICE_AQ_VSI_VLAN_EMOD_NOTHING;
+
+ status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+ if (status) {
+ dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %s aq_err %s\n",
+ ice_stat_str(status),
+ ice_aq_str(hw->adminq.sq_last_status));
+ } else {
+ vsi->info.sec_flags = ctxt->info.sec_flags;
+ vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+ vsi->info.vlan_flags = ctxt->info.vlan_flags;
+ }
+
+ kfree(ctxt);
+}
+
/**
* ice_log_pkg_init - log result of DDP package load
* @hw: pointer to hardware info
if (err) {
dev_err(dev, "probe failed sending driver version %s. error: %d\n",
UTS_RELEASE, err);
- goto err_alloc_sw_unroll;
+ goto err_send_version_unroll;
}
/* since everything is good, start the service timer */
err = ice_init_link_events(pf->hw.port_info);
if (err) {
dev_err(dev, "ice_init_link_events failed: %d\n", err);
- goto err_alloc_sw_unroll;
+ goto err_send_version_unroll;
}
err = ice_init_nvm_phy_type(pf->hw.port_info);
if (err) {
dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
- goto err_alloc_sw_unroll;
+ goto err_send_version_unroll;
}
err = ice_update_link_info(pf->hw.port_info);
if (err) {
dev_err(dev, "ice_update_link_info failed: %d\n", err);
- goto err_alloc_sw_unroll;
+ goto err_send_version_unroll;
}
ice_init_link_dflt_override(pf->hw.port_info);
err = ice_init_phy_user_cfg(pf->hw.port_info);
if (err) {
dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
- goto err_alloc_sw_unroll;
+ goto err_send_version_unroll;
}
if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
/* Disable WoL at init, wait for user to enable */
device_set_wakeup_enable(dev, false);
- /* If no DDP driven features have to be setup, we are done with probe */
- if (ice_is_safe_mode(pf))
+ if (ice_is_safe_mode(pf)) {
+ ice_set_safe_mode_vlan_cfg(pf);
goto probe_done;
+ }
/* initialize DDP driven features */
clear_bit(__ICE_DOWN, pf->state);
return 0;
+err_send_version_unroll:
+ ice_vsi_release_all(pf);
err_alloc_sw_unroll:
ice_devlink_destroy_port(pf);
set_bit(__ICE_SERVICE_DIS, pf->state);
set_bit(__ICE_DOWN, pf->state);
ice_service_task_stop(pf);
+ ice_aq_cancel_waiting_tasks(pf);
+
mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
if (!ice_is_safe_mode(pf))
ice_remove_arfs(pf);
* Power Management callback to quiesce the device and prepare
* for D3 transition.
*/
-static int ice_suspend(struct device *dev)
+static int __maybe_unused ice_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct ice_pf *pf;
* ice_resume - PM callback for waking up from D3
* @dev: generic device information structure
*/
-static int ice_resume(struct device *dev)
+static int __maybe_unused ice_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
enum ice_reset_req reset_type;
return;
}
+ ice_restore_all_vfs_msi_state(pdev);
+
ice_do_reset(pf, ICE_RESET_PFR);
ice_service_task_restart(pf);
mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
vsi->tx_linearize = 0;
vsi->rx_buf_failed = 0;
vsi->rx_page_failed = 0;
+ vsi->rx_gro_dropped = 0;
rcu_read_lock();
vsi_stats->rx_bytes += bytes;
vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
+ vsi->rx_gro_dropped += ring->rx_stats.gro_dropped;
}
/* update XDP Tx rings counters */
ice_update_eth_stats(vsi);
cur_ns->tx_errors = cur_es->tx_errors;
- cur_ns->rx_dropped = cur_es->rx_discards;
+ cur_ns->rx_dropped = cur_es->rx_discards + vsi->rx_gro_dropped;
cur_ns->tx_dropped = cur_es->tx_discards;
cur_ns->multicast = cur_es->rx_multicast;
if (err)
goto err_sched_init_port;
- err = ice_update_link_info(hw->port_info);
- if (err)
- dev_err(dev, "Get link status error %d\n", err);
-
/* start misc vector */
err = ice_req_irq_msix_misc(pf);
if (err) {
return 0;
}
-#ifdef CONFIG_PM
-static int ixgbe_resume(struct pci_dev *pdev)
+static int __maybe_unused ixgbe_resume(struct device *dev_d)
{
+ struct pci_dev *pdev = to_pci_dev(dev_d);
struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
struct net_device *netdev = adapter->netdev;
u32 err;
adapter->hw.hw_addr = adapter->io_addr;
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
- /*
- * pci_restore_state clears dev->state_saved so call
- * pci_save_state to restore it.
- */
- pci_save_state(pdev);
- err = pci_enable_device_mem(pdev);
- if (err) {
- e_dev_err("Cannot enable PCI device from suspend\n");
- return err;
- }
smp_mb__before_atomic();
clear_bit(__IXGBE_DISABLED, &adapter->state);
pci_set_master(pdev);
- pci_wake_from_d3(pdev, false);
+ device_wakeup_disable(dev_d);
ixgbe_reset(adapter);
return err;
}
-#endif /* CONFIG_PM */
static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
{
struct ixgbe_hw *hw = &adapter->hw;
u32 ctrl;
u32 wufc = adapter->wol;
-#ifdef CONFIG_PM
- int retval = 0;
-#endif
rtnl_lock();
netif_device_detach(netdev);
ixgbe_clear_interrupt_scheme(adapter);
rtnl_unlock();
-#ifdef CONFIG_PM
- retval = pci_save_state(pdev);
- if (retval)
- return retval;
-
-#endif
if (hw->mac.ops.stop_link_on_d3)
hw->mac.ops.stop_link_on_d3(hw);
return 0;
}
-#ifdef CONFIG_PM
-static int ixgbe_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused ixgbe_suspend(struct device *dev_d)
{
+ struct pci_dev *pdev = to_pci_dev(dev_d);
int retval;
bool wake;
retval = __ixgbe_shutdown(pdev, &wake);
- if (retval)
- return retval;
- if (wake) {
- pci_prepare_to_sleep(pdev);
- } else {
- pci_wake_from_d3(pdev, false);
- pci_set_power_state(pdev, PCI_D3hot);
- }
+ device_set_wakeup_enable(dev_d, wake);
- return 0;
+ return retval;
}
-#endif /* CONFIG_PM */
static void ixgbe_shutdown(struct pci_dev *pdev)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return ixgbe_xdp_setup(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = adapter->xdp_prog ?
- adapter->xdp_prog->aux->id : 0;
- return 0;
case XDP_SETUP_XSK_UMEM:
return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
xdp->xsk.queue_id);
.resume = ixgbe_io_resume,
};
+static SIMPLE_DEV_PM_OPS(ixgbe_pm_ops, ixgbe_suspend, ixgbe_resume);
+
static struct pci_driver ixgbe_driver = {
- .name = ixgbe_driver_name,
- .id_table = ixgbe_pci_tbl,
- .probe = ixgbe_probe,
- .remove = ixgbe_remove,
-#ifdef CONFIG_PM
- .suspend = ixgbe_suspend,
- .resume = ixgbe_resume,
-#endif
- .shutdown = ixgbe_shutdown,
+ .name = ixgbe_driver_name,
+ .id_table = ixgbe_pci_tbl,
+ .probe = ixgbe_probe,
+ .remove = ixgbe_remove,
+ .driver.pm = &ixgbe_pm_ops,
+ .shutdown = ixgbe_shutdown,
.sriov_configure = ixgbe_pci_sriov_configure,
.err_handler = &ixgbe_err_handler
};
return 0;
}
-static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused ixgbevf_suspend(struct device *dev_d)
{
- struct net_device *netdev = pci_get_drvdata(pdev);
+ struct net_device *netdev = dev_get_drvdata(dev_d);
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
-#ifdef CONFIG_PM
- int retval = 0;
-#endif
rtnl_lock();
netif_device_detach(netdev);
ixgbevf_clear_interrupt_scheme(adapter);
rtnl_unlock();
-#ifdef CONFIG_PM
- retval = pci_save_state(pdev);
- if (retval)
- return retval;
-
-#endif
- if (!test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state))
- pci_disable_device(pdev);
-
return 0;
}
-#ifdef CONFIG_PM
-static int ixgbevf_resume(struct pci_dev *pdev)
+static int __maybe_unused ixgbevf_resume(struct device *dev_d)
{
+ struct pci_dev *pdev = to_pci_dev(dev_d);
struct net_device *netdev = pci_get_drvdata(pdev);
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
u32 err;
- pci_restore_state(pdev);
- /* pci_restore_state clears dev->state_saved so call
- * pci_save_state to restore it.
- */
- pci_save_state(pdev);
-
- err = pci_enable_device_mem(pdev);
- if (err) {
- dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
- return err;
- }
-
adapter->hw.hw_addr = adapter->io_addr;
smp_mb__before_atomic();
clear_bit(__IXGBEVF_DISABLED, &adapter->state);
return err;
}
-#endif /* CONFIG_PM */
static void ixgbevf_shutdown(struct pci_dev *pdev)
{
- ixgbevf_suspend(pdev, PMSG_SUSPEND);
+ ixgbevf_suspend(&pdev->dev);
}
static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats,
static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
- struct ixgbevf_adapter *adapter = netdev_priv(dev);
-
switch (xdp->command) {
case XDP_SETUP_PROG:
return ixgbevf_xdp_setup(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = adapter->xdp_prog ?
- adapter->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
.resume = ixgbevf_io_resume,
};
+static SIMPLE_DEV_PM_OPS(ixgbevf_pm_ops, ixgbevf_suspend, ixgbevf_resume);
+
static struct pci_driver ixgbevf_driver = {
.name = ixgbevf_driver_name,
.id_table = ixgbevf_pci_tbl,
.probe = ixgbevf_probe,
.remove = ixgbevf_remove,
-#ifdef CONFIG_PM
+
/* Power Management Hooks */
- .suspend = ixgbevf_suspend,
- .resume = ixgbevf_resume,
-#endif
+ .driver.pm = &ixgbevf_pm_ops,
+
.shutdown = ixgbevf_shutdown,
.err_handler = &ixgbevf_err_handler
};
phylink_start(pp->phylink);
- /* We may have called phy_speed_down before */
+ /* We may have called phylink_speed_down before */
phylink_speed_up(pp->phylink);
netif_tx_start_all_queues(pp->dev);
static int mvneta_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
- struct mvneta_port *pp = netdev_priv(dev);
-
switch (xdp->command) {
case XDP_SETUP_PROG:
return mvneta_xdp_setup(dev, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = pp->xdp_prog ? pp->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
err = mvpp2_rx_refill(port, bm_pool, pp, pool);
if (err) {
netdev_err(port->dev, "failed to refill BM pools\n");
+ dev_kfree_skb_any(skb);
goto err_drop_frame;
}
switch (xdp->command) {
case XDP_SETUP_PROG:
return mvpp2_xdp_setup(port, xdp);
- case XDP_QUERY_PROG:
- xdp->prog_id = port->xdp_prog ? port->xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
#include "en_tc.h"
#include "en_rep.h"
#include "en_accel/ipsec.h"
-#include "en_accel/ipsec_rxtx.h"
#include "en_accel/en_accel.h"
#include "en_accel/tls.h"
#include "accel/ipsec.h"
#include "en/hv_vhca_stats.h"
#include "en/devlink.h"
#include "lib/mlx5.h"
-#include "fpga/ipsec.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
-
+ mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
MLX5_SET64(mkc, mkc, len, npages << page_shift);
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
&rq->wq_ctrl);
if (err)
- return err;
+ goto err_rq_wq_destroy;
rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
mlx5e_mpwqe_get_log_rq_size(params, xsk);
- rq->post_wqes = mlx5e_post_rx_mpwqes;
- rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
-
- rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
-#ifdef CONFIG_MLX5_EN_IPSEC
- if (MLX5_IPSEC_DEV(mdev)) {
- err = -EINVAL;
- netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
- goto err_rq_wq_destroy;
- }
-#endif
- if (!rq->handle_rx_cqe) {
- err = -EINVAL;
- netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
- goto err_rq_wq_destroy;
- }
-
- rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
- mlx5e_xsk_skb_from_cqe_mpwrq_linear :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
- mlx5e_skb_from_cqe_mpwrq_linear :
- mlx5e_skb_from_cqe_mpwrq_nonlinear;
-
rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
rq->mpwqe.num_strides =
BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
&rq->wq_ctrl);
if (err)
- return err;
+ goto err_rq_wq_destroy;
rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
if (err)
goto err_free;
- rq->post_wqes = mlx5e_post_rx_wqes;
- rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
-
-#ifdef CONFIG_MLX5_EN_IPSEC
- if ((mlx5_fpga_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) &&
- c->priv->ipsec)
- rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
- else
-#endif
- rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
- if (!rq->handle_rx_cqe) {
- err = -EINVAL;
- netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err);
- goto err_free;
- }
-
- rq->wqe.skb_from_cqe = xsk ?
- mlx5e_xsk_skb_from_cqe_linear :
- mlx5e_rx_is_linear_skb(params, NULL) ?
- mlx5e_skb_from_cqe_linear :
- mlx5e_skb_from_cqe_nonlinear;
rq->mkey_be = c->mkey_be;
}
+ err = mlx5e_rq_set_handlers(rq, params, xsk);
+ if (err)
+ goto err_free;
+
if (xsk) {
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL, NULL);
priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
}
+static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
+ enum mlx5_port_status state)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int vport_admin_state;
+
+ mlx5_set_port_admin_status(mdev, state);
+
+ if (!MLX5_ESWITCH_MANAGER(mdev) || mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS)
+ return;
+
+ if (state == MLX5_PORT_UP)
+ vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ else
+ vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN;
+
+ mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state);
+}
+
int mlx5e_open_locked(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
mutex_lock(&priv->state_lock);
err = mlx5e_open_locked(netdev);
if (!err)
- mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
+ mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP);
mutex_unlock(&priv->state_lock);
return err;
return -ENODEV;
mutex_lock(&priv->state_lock);
- mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
+ mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN);
err = mlx5e_close_locked(netdev);
mutex_unlock(&priv->state_lock);
}
#endif
-struct mlx5e_vxlan_work {
- struct work_struct work;
- struct mlx5e_priv *priv;
- u16 port;
-};
-
-static void mlx5e_vxlan_add_work(struct work_struct *work)
-{
- struct mlx5e_vxlan_work *vxlan_work =
- container_of(work, struct mlx5e_vxlan_work, work);
- struct mlx5e_priv *priv = vxlan_work->priv;
- u16 port = vxlan_work->port;
-
- mutex_lock(&priv->state_lock);
- mlx5_vxlan_add_port(priv->mdev->vxlan, port);
- mutex_unlock(&priv->state_lock);
-
- kfree(vxlan_work);
-}
-
-static void mlx5e_vxlan_del_work(struct work_struct *work)
-{
- struct mlx5e_vxlan_work *vxlan_work =
- container_of(work, struct mlx5e_vxlan_work, work);
- struct mlx5e_priv *priv = vxlan_work->priv;
- u16 port = vxlan_work->port;
-
- mutex_lock(&priv->state_lock);
- mlx5_vxlan_del_port(priv->mdev->vxlan, port);
- mutex_unlock(&priv->state_lock);
- kfree(vxlan_work);
-}
-
-static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
-{
- struct mlx5e_vxlan_work *vxlan_work;
-
- vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
- if (!vxlan_work)
- return;
-
- if (add)
- INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
- else
- INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);
-
- vxlan_work->priv = priv;
- vxlan_work->port = port;
- queue_work(priv->wq, &vxlan_work->work);
-}
-
-void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
- return;
-
- if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
- return;
-
- mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
-}
-
-void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
- return;
-
- if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
- return;
-
- mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
-}
-
static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
struct sk_buff *skb,
netdev_features_t features)
return err;
}
- static u32 mlx5e_xdp_query(struct net_device *dev)
- {
- struct mlx5e_priv *priv = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
- u32 prog_id = 0;
-
- mutex_lock(&priv->state_lock);
- xdp_prog = priv->channels.params.xdp_prog;
- if (xdp_prog)
- prog_id = xdp_prog->aux->id;
- mutex_unlock(&priv->state_lock);
-
- return prog_id;
- }
-
static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return mlx5e_xdp_set(dev, xdp->prog);
- case XDP_QUERY_PROG:
- xdp->prog_id = mlx5e_xdp_query(dev);
- return 0;
case XDP_SETUP_XSK_UMEM:
return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
xdp->xsk.queue_id);
.ndo_change_mtu = mlx5e_change_nic_mtu,
.ndo_do_ioctl = mlx5e_ioctl,
.ndo_set_tx_maxrate = mlx5e_set_tx_maxrate,
- .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
- .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
+ .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
+ .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
.ndo_features_check = mlx5e_features_check,
.ndo_tx_timeout = mlx5e_tx_timeout,
.ndo_bpf = mlx5e_xdp,
}
}
+static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table,
+ unsigned int entry, struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port));
+}
+
+static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table,
+ unsigned int entry, struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port));
+}
+
+void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
+{
+ if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
+ return;
+
+ priv->nic_info.set_port = mlx5e_vxlan_set_port;
+ priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
+ priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
+ UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
+ priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
+ /* Don't count the space hard-coded to the IANA port */
+ priv->nic_info.tables[0].n_entries =
+ mlx5_vxlan_max_udp_ports(priv->mdev) - 1;
+
+ priv->netdev->udp_tunnel_nic_info = &priv->nic_info;
+}
+
static void mlx5e_build_nic_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
+ mlx5e_vxlan_set_netdev_info(priv);
+
if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) ||
mlx5e_any_tunnel_proto_supported(mdev)) {
netdev->hw_enc_features |= NETIF_F_HW_CSUM;
/* Marking the link as currently not needed by the Driver */
if (!netif_running(netdev))
- mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
+ mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN);
mlx5e_set_netdev_mtu_boundaries(priv);
mlx5e_set_dev_port_mtu(priv);
rtnl_lock();
if (netif_running(netdev))
mlx5e_open(netdev);
- if (mlx5_vxlan_allowed(priv->mdev->vxlan))
- udp_tunnel_get_rx_info(netdev);
+ udp_tunnel_nic_reset_ntf(priv->netdev);
netif_device_attach(netdev);
rtnl_unlock();
}
rtnl_lock();
if (netif_running(priv->netdev))
mlx5e_close(priv->netdev);
- if (mlx5_vxlan_allowed(priv->mdev->vxlan))
- udp_tunnel_drop_rx_info(priv->netdev);
netif_device_detach(priv->netdev);
rtnl_unlock();
.update_rx = mlx5e_update_nic_rx,
.update_stats = mlx5e_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
- .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe,
- .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .rx_handlers = &mlx5e_rx_handlers_nic,
.max_tc = MLX5E_MAX_NUM_TC,
.rq_groups = MLX5E_NUM_RQ_GROUPS(XSK),
.stats_grps = mlx5e_nic_stats_grps,
profile->cleanup_tx(priv);
out:
+ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ cancel_work_sync(&priv->update_stats_work);
return err;
}
#include "efx.h"
#include "efx_common.h"
#include "efx_channels.h"
+#include "ef100.h"
#include "rx_common.h"
#include "tx_common.h"
#include "nic.h"
static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
struct efx_nic *efx = netdev_priv(dev);
- struct bpf_prog *xdp_prog;
switch (xdp->command) {
case XDP_SETUP_PROG:
return efx_xdp_setup_prog(efx, xdp->prog);
- case XDP_QUERY_PROG:
- xdp_prog = rtnl_dereference(efx->xdp_prog);
- xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0;
- return 0;
default:
return -EINVAL;
}
if (rc < 0)
goto err_pci;
+ rc = pci_register_driver(&ef100_pci_driver);
+ if (rc < 0)
+ goto err_pci_ef100;
+
return 0;
+ err_pci_ef100:
+ pci_unregister_driver(&efx_pci_driver);
err_pci:
efx_destroy_reset_workqueue();
err_reset:
{
printk(KERN_INFO "Solarflare NET driver unloading\n");
+ pci_unregister_driver(&ef100_pci_driver);
pci_unregister_driver(&efx_pci_driver);
efx_destroy_reset_workqueue();
#ifdef CONFIG_SFC_SRIOV
return 0;
}
- static u32 tun_xdp_query(struct net_device *dev)
- {
- struct tun_struct *tun = netdev_priv(dev);
- const struct bpf_prog *xdp_prog;
-
- xdp_prog = rtnl_dereference(tun->xdp_prog);
- if (xdp_prog)
- return xdp_prog->aux->id;
-
- return 0;
- }
-
static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return tun_xdp_set(dev, xdp->prog, xdp->extack);
- case XDP_QUERY_PROG:
- xdp->prog_id = tun_xdp_query(dev);
- return 0;
default:
return -EINVAL;
}
return ret;
}
-static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
+static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p,
void __user *data)
{
struct bpf_prog *prog;
*/
XDP_SETUP_PROG,
XDP_SETUP_PROG_HW,
- XDP_QUERY_PROG,
- XDP_QUERY_PROG_HW,
/* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
struct netlink_ext_ack;
struct xdp_umem;
struct xdp_dev_bulk_queue;
+ struct bpf_xdp_link;
+
+ enum bpf_xdp_mode {
+ XDP_MODE_SKB = 0,
+ XDP_MODE_DRV = 1,
+ XDP_MODE_HW = 2,
+ __MAX_XDP_MODE
+ };
+
+ struct bpf_xdp_entity {
+ struct bpf_prog *prog;
+ struct bpf_xdp_link *link;
+ };
struct netdev_bpf {
enum bpf_netdev_command command;
struct bpf_prog *prog;
struct netlink_ext_ack *extack;
};
- /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */
- struct {
- u32 prog_id;
- /* flags with which program was installed */
- u32 prog_flags;
- };
/* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
struct {
struct bpf_offloaded_map *offmap;
struct timer_list watchdog_timer;
int watchdog_timeo;
+ u32 proto_down_reason;
+
struct list_head todo_list;
int __percpu *pcpu_refcnt;
#endif
const struct udp_tunnel_nic_info *udp_tunnel_nic_info;
struct udp_tunnel_nic *udp_tunnel_nic;
+
+ /* protected by rtnl_lock */
+ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE];
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
int dev_change_proto_down(struct net_device *dev, bool proto_down);
int dev_change_proto_down_generic(struct net_device *dev, bool proto_down);
+void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
+ u32 value);
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, int expected_fd, u32 flags);
- u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
- enum bpf_netdev_command cmd);
+ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+ u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
+
int xdp_umem_query(struct net_device *dev, u16 queue_id);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
btf_kind_str[BTF_INFO_KIND(t->info)]);
return false;
}
+
+ /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
+ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+ const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+
+ if (ctx_arg_info->offset == off &&
+ (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
+ ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
+ info->reg_type = ctx_arg_info->reg_type;
+ return true;
+ }
+ }
+
if (t->type == 0)
/* This is a pointer to void.
* It is the same as scalar from the verifier safety pov.
{
int id;
- if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
+ if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID || !btf_vmlinux)
return -EINVAL;
id = fn->btf_id[arg];
if (!id || id > btf_vmlinux->nr_types)
htab_elem_free(htab, l);
}
-static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
+static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
{
struct bpf_map *map = &htab->map;
+ void *ptr;
if (map->ops->map_fd_put_ptr) {
- void *ptr = fd_htab_map_get_ptr(map, l);
-
+ ptr = fd_htab_map_get_ptr(map, l);
map->ops->map_fd_put_ptr(ptr);
}
+}
+
+static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
+{
+ htab_put_fd_value(htab, l);
if (htab_is_prealloc(htab)) {
__pcpu_freelist_push(&htab->freelist, &l->fnode);
*/
pl_new = this_cpu_ptr(htab->extra_elems);
l_new = *pl_new;
+ htab_put_fd_value(htab, old_elem);
*pl_new = old_elem;
} else {
struct pcpu_freelist_node *l;
true, false);
}
+ struct bpf_iter_seq_hash_map_info {
+ struct bpf_map *map;
+ struct bpf_htab *htab;
+ void *percpu_value_buf; // non-zero means percpu hash
+ unsigned long flags;
+ u32 bucket_id;
+ u32 skip_elems;
+ };
+
+ static struct htab_elem *
+ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
+ struct htab_elem *prev_elem)
+ {
+ const struct bpf_htab *htab = info->htab;
+ unsigned long flags = info->flags;
+ u32 skip_elems = info->skip_elems;
+ u32 bucket_id = info->bucket_id;
+ struct hlist_nulls_head *head;
+ struct hlist_nulls_node *n;
+ struct htab_elem *elem;
+ struct bucket *b;
+ u32 i, count;
+
+ if (bucket_id >= htab->n_buckets)
+ return NULL;
+
+ /* try to find next elem in the same bucket */
+ if (prev_elem) {
+ /* no update/deletion on this bucket, prev_elem should be still valid
+ * and we won't skip elements.
+ */
+ n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
+ elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
+ if (elem)
+ return elem;
+
+ /* not found, unlock and go to the next bucket */
+ b = &htab->buckets[bucket_id++];
+ htab_unlock_bucket(htab, b, flags);
+ skip_elems = 0;
+ }
+
+ for (i = bucket_id; i < htab->n_buckets; i++) {
+ b = &htab->buckets[i];
+ flags = htab_lock_bucket(htab, b);
+
+ count = 0;
+ head = &b->head;
+ hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
+ if (count >= skip_elems) {
+ info->flags = flags;
+ info->bucket_id = i;
+ info->skip_elems = count;
+ return elem;
+ }
+ count++;
+ }
+
+ htab_unlock_bucket(htab, b, flags);
+ skip_elems = 0;
+ }
+
+ info->bucket_id = i;
+ info->skip_elems = 0;
+ return NULL;
+ }
+
+ static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+ struct htab_elem *elem;
+
+ elem = bpf_hash_map_seq_find_next(info, NULL);
+ if (!elem)
+ return NULL;
+
+ if (*pos == 0)
+ ++*pos;
+ return elem;
+ }
+
+ static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+
+ ++*pos;
+ ++info->skip_elems;
+ return bpf_hash_map_seq_find_next(info, v);
+ }
+
+ static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
+ {
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+ u32 roundup_key_size, roundup_value_size;
+ struct bpf_iter__bpf_map_elem ctx = {};
+ struct bpf_map *map = info->map;
+ struct bpf_iter_meta meta;
+ int ret = 0, off = 0, cpu;
+ struct bpf_prog *prog;
+ void __percpu *pptr;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, elem == NULL);
+ if (prog) {
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (elem) {
+ roundup_key_size = round_up(map->key_size, 8);
+ ctx.key = elem->key;
+ if (!info->percpu_value_buf) {
+ ctx.value = elem->key + roundup_key_size;
+ } else {
+ roundup_value_size = round_up(map->value_size, 8);
+ pptr = htab_elem_get_ptr(elem, map->key_size);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(info->percpu_value_buf + off,
+ per_cpu_ptr(pptr, cpu),
+ roundup_value_size);
+ off += roundup_value_size;
+ }
+ ctx.value = info->percpu_value_buf;
+ }
+ }
+ ret = bpf_iter_run_prog(prog, &ctx);
+ }
+
+ return ret;
+ }
+
+ static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
+ {
+ return __bpf_hash_map_seq_show(seq, v);
+ }
+
+ static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
+ {
+ struct bpf_iter_seq_hash_map_info *info = seq->private;
+
+ if (!v)
+ (void)__bpf_hash_map_seq_show(seq, NULL);
+ else
+ htab_unlock_bucket(info->htab,
+ &info->htab->buckets[info->bucket_id],
+ info->flags);
+ }
+
+ static int bpf_iter_init_hash_map(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+ {
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+ struct bpf_map *map = aux->map;
+ void *value_buf;
+ u32 buf_size;
+
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ buf_size = round_up(map->value_size, 8) * num_possible_cpus();
+ value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
+ if (!value_buf)
+ return -ENOMEM;
+
+ seq_info->percpu_value_buf = value_buf;
+ }
+
+ seq_info->map = map;
+ seq_info->htab = container_of(map, struct bpf_htab, map);
+ return 0;
+ }
+
+ static void bpf_iter_fini_hash_map(void *priv_data)
+ {
+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
+
+ kfree(seq_info->percpu_value_buf);
+ }
+
+ static const struct seq_operations bpf_hash_map_seq_ops = {
+ .start = bpf_hash_map_seq_start,
+ .next = bpf_hash_map_seq_next,
+ .stop = bpf_hash_map_seq_stop,
+ .show = bpf_hash_map_seq_show,
+ };
+
+ static const struct bpf_iter_seq_info iter_seq_info = {
+ .seq_ops = &bpf_hash_map_seq_ops,
+ .init_seq_private = bpf_iter_init_hash_map,
+ .fini_seq_private = bpf_iter_fini_hash_map,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
+ };
+
static int htab_map_btf_id;
const struct bpf_map_ops htab_map_ops = {
.map_alloc_check = htab_map_alloc_check,
BATCH_OPS(htab),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int htab_lru_map_btf_id;
BATCH_OPS(htab_lru),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
/* Called from eBPF program */
BATCH_OPS(htab_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_percpu_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int htab_lru_percpu_map_btf_id;
BATCH_OPS(htab_lru_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_percpu_map_btf_id,
+ .iter_seq_info = &iter_seq_info,
};
static int fd_htab_map_alloc_check(union bpf_attr *attr)
static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t features)
{
- int tmp;
__be16 type;
- type = skb_network_protocol(skb, &tmp);
+ type = skb_network_protocol(skb, NULL);
features = net_mpls_features(skb, features, type);
if (skb->ip_summed != CHECKSUM_NONE &&
}
break;
- case XDP_QUERY_PROG:
- xdp->prog_id = old ? old->aux->id : 0;
- break;
-
default:
ret = -EINVAL;
break;
}
EXPORT_SYMBOL(dev_change_proto_down_generic);
- u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
- enum bpf_netdev_command cmd)
+/**
+ * dev_change_proto_down_reason - proto down reason
+ *
+ * @dev: device
+ * @mask: proto down mask
+ * @value: proto down value
+ */
+void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
+ u32 value)
+{
+ int b;
+
+ if (!mask) {
+ dev->proto_down_reason = value;
+ } else {
+ for_each_set_bit(b, &mask, 32) {
+ if (value & (1 << b))
+ dev->proto_down_reason |= BIT(b);
+ else
+ dev->proto_down_reason &= ~BIT(b);
+ }
+ }
+}
+EXPORT_SYMBOL(dev_change_proto_down_reason);
+
+ struct bpf_xdp_link {
+ struct bpf_link link;
+ struct net_device *dev; /* protected by rtnl_lock, no refcnt held */
+ int flags;
+ };
+
+ static enum bpf_xdp_mode dev_xdp_mode(u32 flags)
{
- struct netdev_bpf xdp;
+ if (flags & XDP_FLAGS_HW_MODE)
+ return XDP_MODE_HW;
+ if (flags & XDP_FLAGS_DRV_MODE)
+ return XDP_MODE_DRV;
+ return XDP_MODE_SKB;
+ }
- if (!bpf_op)
- return 0;
+ static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
+ {
+ switch (mode) {
+ case XDP_MODE_SKB:
+ return generic_xdp_install;
+ case XDP_MODE_DRV:
+ case XDP_MODE_HW:
+ return dev->netdev_ops->ndo_bpf;
+ default:
+ return NULL;
+ };
+ }
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = cmd;
+ static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
+ enum bpf_xdp_mode mode)
+ {
+ return dev->xdp_state[mode].link;
+ }
+
+ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
+ enum bpf_xdp_mode mode)
+ {
+ struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
+
+ if (link)
+ return link->link.prog;
+ return dev->xdp_state[mode].prog;
+ }
+
+ u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
+ {
+ struct bpf_prog *prog = dev_xdp_prog(dev, mode);
- /* Query must always succeed. */
- WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
+ return prog ? prog->aux->id : 0;
+ }
- return xdp.prog_id;
+ static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
+ struct bpf_xdp_link *link)
+ {
+ dev->xdp_state[mode].link = link;
+ dev->xdp_state[mode].prog = NULL;
}
- static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
- struct netlink_ext_ack *extack, u32 flags,
- struct bpf_prog *prog)
+ static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
+ struct bpf_prog *prog)
+ {
+ dev->xdp_state[mode].link = NULL;
+ dev->xdp_state[mode].prog = prog;
+ }
+
+ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
+ bpf_op_t bpf_op, struct netlink_ext_ack *extack,
+ u32 flags, struct bpf_prog *prog)
{
- bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
- struct bpf_prog *prev_prog = NULL;
struct netdev_bpf xdp;
int err;
- if (non_hw) {
- prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
- XDP_QUERY_PROG));
- if (IS_ERR(prev_prog))
- prev_prog = NULL;
- }
-
memset(&xdp, 0, sizeof(xdp));
- if (flags & XDP_FLAGS_HW_MODE)
- xdp.command = XDP_SETUP_PROG_HW;
- else
- xdp.command = XDP_SETUP_PROG;
+ xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
xdp.extack = extack;
xdp.flags = flags;
xdp.prog = prog;
+ /* Drivers assume refcnt is already incremented (i.e, prog pointer is
+ * "moved" into driver), so they don't increment it on their own, but
+ * they do decrement refcnt when program is detached or replaced.
+ * Given net_device also owns link/prog, we need to bump refcnt here
+ * to prevent drivers from underflowing it.
+ */
+ if (prog)
+ bpf_prog_inc(prog);
err = bpf_op(dev, &xdp);
- if (!err && non_hw)
- bpf_prog_change_xdp(prev_prog, prog);
+ if (err) {
+ if (prog)
+ bpf_prog_put(prog);
+ return err;
+ }
- if (prev_prog)
- bpf_prog_put(prev_prog);
+ if (mode != XDP_MODE_HW)
+ bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);
- return err;
+ return 0;
}
static void dev_xdp_uninstall(struct net_device *dev)
{
- struct netdev_bpf xdp;
- bpf_op_t ndo_bpf;
+ struct bpf_xdp_link *link;
+ struct bpf_prog *prog;
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
- /* Remove generic XDP */
- WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+ ASSERT_RTNL();
- /* Remove from the driver */
- ndo_bpf = dev->netdev_ops->ndo_bpf;
- if (!ndo_bpf)
- return;
+ for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
+ prog = dev_xdp_prog(dev, mode);
+ if (!prog)
+ continue;
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG;
- WARN_ON(ndo_bpf(dev, &xdp));
- if (xdp.prog_id)
- WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
- NULL));
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ if (!bpf_op)
+ continue;
- /* Remove HW offload */
- memset(&xdp, 0, sizeof(xdp));
- xdp.command = XDP_QUERY_PROG_HW;
- if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
- WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
- NULL));
+ WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+
+ /* auto-detach link from net device */
+ link = dev_xdp_link(dev, mode);
+ if (link)
+ link->dev = NULL;
+ else
+ bpf_prog_put(prog);
+
+ dev_xdp_set_link(dev, mode, NULL);
+ }
}
- /**
- * dev_change_xdp_fd - set or clear a bpf program for a device rx path
- * @dev: device
- * @extack: netlink extended ack
- * @fd: new program fd or negative value to clear
- * @expected_fd: old program fd that userspace expects to replace or clear
- * @flags: xdp-related flags
- *
- * Set or clear a bpf program for a device
- */
- int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
- int fd, int expected_fd, u32 flags)
+ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog, u32 flags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
- enum bpf_netdev_command query;
- u32 prog_id, expected_id = 0;
- bpf_op_t bpf_op, bpf_chk;
- struct bpf_prog *prog;
- bool offload;
+ struct bpf_prog *cur_prog;
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
int err;
ASSERT_RTNL();
- offload = flags & XDP_FLAGS_HW_MODE;
- query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+ /* either link or prog attachment, never both */
+ if (link && (new_prog || old_prog))
+ return -EINVAL;
+ /* link supports only XDP mode flags */
+ if (link && (flags & ~XDP_FLAGS_MODES)) {
+ NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
+ return -EINVAL;
+ }
+ /* just one XDP mode bit should be set, zero defaults to SKB mode */
+ if (hweight32(flags & XDP_FLAGS_MODES) > 1) {
+ NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
+ return -EINVAL;
+ }
+ /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */
+ if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
+ NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
+ return -EINVAL;
+ }
- bpf_op = bpf_chk = ops->ndo_bpf;
- if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) {
- NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode");
- return -EOPNOTSUPP;
+ mode = dev_xdp_mode(flags);
+ /* can't replace attached link */
+ if (dev_xdp_link(dev, mode)) {
+ NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
+ return -EBUSY;
}
- if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
- bpf_op = generic_xdp_install;
- if (bpf_op == bpf_chk)
- bpf_chk = generic_xdp_install;
-
- prog_id = __dev_xdp_query(dev, bpf_op, query);
- if (flags & XDP_FLAGS_REPLACE) {
- if (expected_fd >= 0) {
- prog = bpf_prog_get_type_dev(expected_fd,
- BPF_PROG_TYPE_XDP,
- bpf_op == ops->ndo_bpf);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
- expected_id = prog->aux->id;
- bpf_prog_put(prog);
- }
- if (prog_id != expected_id) {
- NL_SET_ERR_MSG(extack, "Active program does not match expected");
- return -EEXIST;
- }
+ cur_prog = dev_xdp_prog(dev, mode);
+ /* can't replace attached prog with link */
+ if (link && cur_prog) {
+ NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
+ return -EBUSY;
+ }
+ if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
+ NL_SET_ERR_MSG(extack, "Active program does not match expected");
+ return -EEXIST;
+ }
+ if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
+ NL_SET_ERR_MSG(extack, "XDP program already attached");
+ return -EBUSY;
}
- if (fd >= 0) {
- if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) {
- NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time");
- return -EEXIST;
- }
- if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) {
- NL_SET_ERR_MSG(extack, "XDP program already attached");
- return -EBUSY;
- }
+ /* put effective new program into new_prog */
+ if (link)
+ new_prog = link->link.prog;
- prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
- bpf_op == ops->ndo_bpf);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
+ if (new_prog) {
+ bool offload = mode == XDP_MODE_HW;
+ enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
+ ? XDP_MODE_DRV : XDP_MODE_SKB;
- if (!offload && bpf_prog_is_dev_bound(prog->aux)) {
- NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported");
- bpf_prog_put(prog);
+ if (!offload && dev_xdp_prog(dev, other_mode)) {
+ NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
+ return -EEXIST;
+ }
+ if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
+ NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
return -EINVAL;
}
-
- if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+ if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
- bpf_prog_put(prog);
return -EINVAL;
}
-
- if (prog->expected_attach_type == BPF_XDP_CPUMAP) {
- NL_SET_ERR_MSG(extack,
- "BPF_XDP_CPUMAP programs can not be attached to a device");
- bpf_prog_put(prog);
+ if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
+ NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
return -EINVAL;
}
+ }
- /* prog->aux->id may be 0 for orphaned device-bound progs */
- if (prog->aux->id && prog->aux->id == prog_id) {
- bpf_prog_put(prog);
- return 0;
+ /* don't call drivers if the effective program didn't change */
+ if (new_prog != cur_prog) {
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ if (!bpf_op) {
+ NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
+ return -EOPNOTSUPP;
+ }
+
+ err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
+ if (err)
+ return err;
+ }
+
+ if (link)
+ dev_xdp_set_link(dev, mode, link);
+ else
+ dev_xdp_set_prog(dev, mode, new_prog);
+ if (cur_prog)
+ bpf_prog_put(cur_prog);
+
+ return 0;
+ }
+
+ static int dev_xdp_attach_link(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link)
+ {
+ return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
+ }
+
+ static int dev_xdp_detach_link(struct net_device *dev,
+ struct netlink_ext_ack *extack,
+ struct bpf_xdp_link *link)
+ {
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+
+ ASSERT_RTNL();
+
+ mode = dev_xdp_mode(link->flags);
+ if (dev_xdp_link(dev, mode) != link)
+ return -EINVAL;
+
+ bpf_op = dev_xdp_bpf_op(dev, mode);
+ WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
+ dev_xdp_set_link(dev, mode, NULL);
+ return 0;
+ }
+
+ static void bpf_xdp_link_release(struct bpf_link *link)
+ {
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+
+ rtnl_lock();
+
+ /* if racing with net_device's tear down, xdp_link->dev might be
+ * already NULL, in which case link was already auto-detached
+ */
+ if (xdp_link->dev) {
+ WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
+ xdp_link->dev = NULL;
+ }
+
+ rtnl_unlock();
+ }
+
+ static int bpf_xdp_link_detach(struct bpf_link *link)
+ {
+ bpf_xdp_link_release(link);
+ return 0;
+ }
+
+ static void bpf_xdp_link_dealloc(struct bpf_link *link)
+ {
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+
+ kfree(xdp_link);
+ }
+
+ static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+ {
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (xdp_link->dev)
+ ifindex = xdp_link->dev->ifindex;
+ rtnl_unlock();
+
+ seq_printf(seq, "ifindex:\t%u\n", ifindex);
+ }
+
+ static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+ {
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ u32 ifindex = 0;
+
+ rtnl_lock();
+ if (xdp_link->dev)
+ ifindex = xdp_link->dev->ifindex;
+ rtnl_unlock();
+
+ info->xdp.ifindex = ifindex;
+ return 0;
+ }
+
+ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog)
+ {
+ struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
+ enum bpf_xdp_mode mode;
+ bpf_op_t bpf_op;
+ int err = 0;
+
+ rtnl_lock();
+
+ /* link might have been auto-released already, so fail */
+ if (!xdp_link->dev) {
+ err = -ENOLINK;
+ goto out_unlock;
+ }
+
+ if (old_prog && link->prog != old_prog) {
+ err = -EPERM;
+ goto out_unlock;
+ }
+ old_prog = link->prog;
+ if (old_prog == new_prog) {
+ /* no-op, don't disturb drivers */
+ bpf_prog_put(new_prog);
+ goto out_unlock;
+ }
+
+ mode = dev_xdp_mode(xdp_link->flags);
+ bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
+ err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
+ xdp_link->flags, new_prog);
+ if (err)
+ goto out_unlock;
+
+ old_prog = xchg(&link->prog, new_prog);
+ bpf_prog_put(old_prog);
+
+ out_unlock:
+ rtnl_unlock();
+ return err;
+ }
+
+ static const struct bpf_link_ops bpf_xdp_link_lops = {
+ .release = bpf_xdp_link_release,
+ .dealloc = bpf_xdp_link_dealloc,
+ .detach = bpf_xdp_link_detach,
+ .show_fdinfo = bpf_xdp_link_show_fdinfo,
+ .fill_link_info = bpf_xdp_link_fill_link_info,
+ .update_prog = bpf_xdp_link_update,
+ };
+
+ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+ {
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_link_primer link_primer;
+ struct bpf_xdp_link *link;
+ struct net_device *dev;
+ int err, fd;
+
+ dev = dev_get_by_index(net, attr->link_create.target_ifindex);
+ if (!dev)
+ return -EINVAL;
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out_put_dev;
+ }
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
+ link->dev = dev;
+ link->flags = attr->link_create.flags;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ goto out_put_dev;
+ }
+
+ rtnl_lock();
+ err = dev_xdp_attach_link(dev, NULL, link);
+ rtnl_unlock();
+
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ goto out_put_dev;
+ }
+
+ fd = bpf_link_settle(&link_primer);
+ /* link itself doesn't hold dev's refcnt to not complicate shutdown */
+ dev_put(dev);
+ return fd;
+
+ out_put_dev:
+ dev_put(dev);
+ return err;
+ }
+
+ /**
+ * dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ * @dev: device
+ * @extack: netlink extended ack
+ * @fd: new program fd or negative value to clear
+ * @expected_fd: old program fd that userspace expects to replace or clear
+ * @flags: xdp-related flags
+ *
+ * Set or clear a bpf program for a device
+ */
+ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, int expected_fd, u32 flags)
+ {
+ enum bpf_xdp_mode mode = dev_xdp_mode(flags);
+ struct bpf_prog *new_prog = NULL, *old_prog = NULL;
+ int err;
+
+ ASSERT_RTNL();
+
+ if (fd >= 0) {
+ new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ mode != XDP_MODE_SKB);
+ if (IS_ERR(new_prog))
+ return PTR_ERR(new_prog);
+ }
+
+ if (expected_fd >= 0) {
+ old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
+ mode != XDP_MODE_SKB);
+ if (IS_ERR(old_prog)) {
+ err = PTR_ERR(old_prog);
+ old_prog = NULL;
+ goto err_out;
}
- } else {
- if (!prog_id)
- return 0;
- prog = NULL;
}
- err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
- if (err < 0 && prog)
- bpf_prog_put(prog);
+ err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
+ err_out:
+ if (err && new_prog)
+ bpf_prog_put(new_prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
return err;
}
return size;
}
+static size_t rtnl_proto_down_size(const struct net_device *dev)
+{
+ size_t size = nla_total_size(1);
+
+ if (dev->proto_down_reason)
+ size += nla_total_size(0) + nla_total_size(4);
+
+ return size;
+}
+
static noinline size_t if_nlmsg_size(const struct net_device *dev,
u32 ext_filter_mask)
{
+ nla_total_size(4) /* IFLA_EVENT */
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
+ nla_total_size(4) /* IFLA_NEW_IFINDEX */
- + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ + rtnl_proto_down_size(dev) /* proto down */
+ nla_total_size(4) /* IFLA_TARGET_NETNSID */
+ nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
{
- return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+ return dev_xdp_prog_id(dev, XDP_MODE_DRV);
}
static u32 rtnl_xdp_prog_hw(struct net_device *dev)
{
- return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
- XDP_QUERY_PROG_HW);
+ return dev_xdp_prog_id(dev, XDP_MODE_HW);
}
static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
return ret;
}
+static int rtnl_fill_proto_down(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ struct nlattr *pr;
+ u32 preason;
+
+ if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ goto nla_put_failure;
+
+ preason = dev->proto_down_reason;
+ if (!preason)
+ return 0;
+
+ pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON);
+ if (!pr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) {
+ nla_nest_cancel(skb, pr);
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, pr);
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb,
struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
atomic_read(&dev->carrier_down_count)) ||
- nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
atomic_read(&dev->carrier_up_count)) ||
nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
atomic_read(&dev->carrier_down_count)))
goto nla_put_failure;
+ if (rtnl_fill_proto_down(skb, dev))
+ goto nla_put_failure;
+
if (event != IFLA_EVENT_NONE) {
if (nla_put_u32(skb, IFLA_EVENT, event))
goto nla_put_failure;
[IFLA_ALT_IFNAME] = { .type = NLA_STRING,
.len = ALTIFNAMSIZ - 1 },
[IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
+ [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
return 0;
}
+static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = {
+ [IFLA_PROTO_DOWN_REASON_MASK] = { .type = NLA_U32 },
+ [IFLA_PROTO_DOWN_REASON_VALUE] = { .type = NLA_U32 },
+};
+
+static int do_set_proto_down(struct net_device *dev,
+ struct nlattr *nl_proto_down,
+ struct nlattr *nl_proto_down_reason,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1];
+ const struct net_device_ops *ops = dev->netdev_ops;
+ unsigned long mask = 0;
+ u32 value;
+ bool proto_down;
+ int err;
+
+ if (!ops->ndo_change_proto_down) {
+ NL_SET_ERR_MSG(extack, "Protodown not supported by device");
+ return -EOPNOTSUPP;
+ }
+
+ if (nl_proto_down_reason) {
+ err = nla_parse_nested_deprecated(pdreason,
+ IFLA_PROTO_DOWN_REASON_MAX,
+ nl_proto_down_reason,
+ ifla_proto_down_reason_policy,
+ NULL);
+ if (err < 0)
+ return err;
+
+ if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) {
+ NL_SET_ERR_MSG(extack, "Invalid protodown reason value");
+ return -EINVAL;
+ }
+
+ value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]);
+
+ if (pdreason[IFLA_PROTO_DOWN_REASON_MASK])
+ mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]);
+
+ dev_change_proto_down_reason(dev, mask, value);
+ }
+
+ if (nl_proto_down) {
+ proto_down = nla_get_u8(nl_proto_down);
+
+ /* Dont turn off protodown if there are active reasons */
+ if (!proto_down && dev->proto_down_reason) {
+ NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
+ return -EBUSY;
+ }
+ err = dev_change_proto_down(dev,
+ proto_down);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
#define DO_SETLINK_MODIFIED 0x01
/* notify flag means notify + modified. */
#define DO_SETLINK_NOTIFY 0x03
}
err = 0;
- if (tb[IFLA_PROTO_DOWN]) {
- err = dev_change_proto_down(dev,
- nla_get_u8(tb[IFLA_PROTO_DOWN]));
+ if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) {
+ err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN],
+ tb[IFLA_PROTO_DOWN_REASON], extack);
if (err)
goto errout;
status |= DO_SETLINK_NOTIFY;
rt->fib6_src.plen = cfg->fc_src_len;
#endif
if (nh) {
- if (!nexthop_get(nh)) {
- NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
- goto out;
- }
if (rt->fib6_src.plen) {
NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
goto out;
}
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ goto out;
+ }
rt->nh = nh;
fib6_nh = nexthop_fib6_nh(rt->nh);
} else {
BTF_ID_LIST(btf_fib6_info_id)
BTF_ID(struct, fib6_info)
- static struct bpf_iter_reg ipv6_route_reg_info = {
- .target = "ipv6_route",
+ static const struct bpf_iter_seq_info ipv6_route_seq_info = {
.seq_ops = &ipv6_route_seq_ops,
.init_seq_private = bpf_iter_init_seq_net,
.fini_seq_private = bpf_iter_fini_seq_net,
.seq_priv_size = sizeof(struct ipv6_route_iter),
+ };
+
+ static struct bpf_iter_reg ipv6_route_reg_info = {
+ .target = "ipv6_route",
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__ipv6_route, rt),
PTR_TO_BTF_ID_OR_NULL },
},
+ .seq_info = &ipv6_route_seq_info,
};
static int __init bpf_iter_register(void)