1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * net/switchdev/switchdev.c - Switch device API
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/init.h>
11 #include <linux/mutex.h>
12 #include <linux/notifier.h>
13 #include <linux/netdevice.h>
14 #include <linux/etherdevice.h>
15 #include <linux/if_bridge.h>
16 #include <linux/list.h>
17 #include <linux/workqueue.h>
18 #include <linux/if_vlan.h>
19 #include <linux/rtnetlink.h>
20 #include <net/switchdev.h>
22 static LIST_HEAD(deferred);
23 static DEFINE_SPINLOCK(deferred_lock);
25 typedef void switchdev_deferred_func_t(struct net_device *dev,
28 struct switchdev_deferred_item {
29 struct list_head list;
30 struct net_device *dev;
31 netdevice_tracker dev_tracker;
32 switchdev_deferred_func_t *func;
36 static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
38 struct switchdev_deferred_item *dfitem;
40 spin_lock_bh(&deferred_lock);
41 if (list_empty(&deferred)) {
45 dfitem = list_first_entry(&deferred,
46 struct switchdev_deferred_item, list);
47 list_del(&dfitem->list);
49 spin_unlock_bh(&deferred_lock);
54 * switchdev_deferred_process - Process ops in deferred queue
56 * Called to flush the ops currently queued in deferred ops queue.
57 * rtnl_lock must be held.
59 void switchdev_deferred_process(void)
61 struct switchdev_deferred_item *dfitem;
65 while ((dfitem = switchdev_deferred_dequeue())) {
66 dfitem->func(dfitem->dev, dfitem->data);
67 dev_put_track(dfitem->dev, &dfitem->dev_tracker);
71 EXPORT_SYMBOL_GPL(switchdev_deferred_process);
73 static void switchdev_deferred_process_work(struct work_struct *work)
76 switchdev_deferred_process();
80 static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
82 static int switchdev_deferred_enqueue(struct net_device *dev,
83 const void *data, size_t data_len,
84 switchdev_deferred_func_t *func)
86 struct switchdev_deferred_item *dfitem;
88 dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
93 memcpy(dfitem->data, data, data_len);
94 dev_hold_track(dev, &dfitem->dev_tracker, GFP_ATOMIC);
95 spin_lock_bh(&deferred_lock);
96 list_add_tail(&dfitem->list, &deferred);
97 spin_unlock_bh(&deferred_lock);
98 schedule_work(&deferred_process_work);
102 static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
103 struct net_device *dev,
104 const struct switchdev_attr *attr,
105 struct netlink_ext_ack *extack)
110 struct switchdev_notifier_port_attr_info attr_info = {
115 rc = call_switchdev_blocking_notifiers(nt, dev,
116 &attr_info.info, extack);
117 err = notifier_to_errno(rc);
119 WARN_ON(!attr_info.handled);
123 if (!attr_info.handled)
129 static int switchdev_port_attr_set_now(struct net_device *dev,
130 const struct switchdev_attr *attr,
131 struct netlink_ext_ack *extack)
133 return switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
137 static void switchdev_port_attr_set_deferred(struct net_device *dev,
140 const struct switchdev_attr *attr = data;
143 err = switchdev_port_attr_set_now(dev, attr, NULL);
144 if (err && err != -EOPNOTSUPP)
145 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
148 attr->complete(dev, err, attr->complete_priv);
151 static int switchdev_port_attr_set_defer(struct net_device *dev,
152 const struct switchdev_attr *attr)
154 return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
155 switchdev_port_attr_set_deferred);
159 * switchdev_port_attr_set - Set port attribute
162 * @attr: attribute to set
163 * @extack: netlink extended ack, for error message propagation
165 * rtnl_lock must be held and must not be in atomic section,
166 * in case SWITCHDEV_F_DEFER flag is not set.
168 int switchdev_port_attr_set(struct net_device *dev,
169 const struct switchdev_attr *attr,
170 struct netlink_ext_ack *extack)
172 if (attr->flags & SWITCHDEV_F_DEFER)
173 return switchdev_port_attr_set_defer(dev, attr);
175 return switchdev_port_attr_set_now(dev, attr, extack);
177 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
179 static size_t switchdev_obj_size(const struct switchdev_obj *obj)
182 case SWITCHDEV_OBJ_ID_PORT_VLAN:
183 return sizeof(struct switchdev_obj_port_vlan);
184 case SWITCHDEV_OBJ_ID_PORT_MDB:
185 return sizeof(struct switchdev_obj_port_mdb);
186 case SWITCHDEV_OBJ_ID_HOST_MDB:
187 return sizeof(struct switchdev_obj_port_mdb);
194 static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
195 struct net_device *dev,
196 const struct switchdev_obj *obj,
197 struct netlink_ext_ack *extack)
202 struct switchdev_notifier_port_obj_info obj_info = {
207 rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack);
208 err = notifier_to_errno(rc);
210 WARN_ON(!obj_info.handled);
213 if (!obj_info.handled)
218 static void switchdev_port_obj_add_deferred(struct net_device *dev,
221 const struct switchdev_obj *obj = data;
225 err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
227 if (err && err != -EOPNOTSUPP)
228 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
231 obj->complete(dev, err, obj->complete_priv);
234 static int switchdev_port_obj_add_defer(struct net_device *dev,
235 const struct switchdev_obj *obj)
237 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
238 switchdev_port_obj_add_deferred);
242 * switchdev_port_obj_add - Add port object
245 * @obj: object to add
246 * @extack: netlink extended ack
248 * rtnl_lock must be held and must not be in atomic section,
249 * in case SWITCHDEV_F_DEFER flag is not set.
251 int switchdev_port_obj_add(struct net_device *dev,
252 const struct switchdev_obj *obj,
253 struct netlink_ext_ack *extack)
255 if (obj->flags & SWITCHDEV_F_DEFER)
256 return switchdev_port_obj_add_defer(dev, obj);
258 return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
261 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
263 static int switchdev_port_obj_del_now(struct net_device *dev,
264 const struct switchdev_obj *obj)
266 return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
270 static void switchdev_port_obj_del_deferred(struct net_device *dev,
273 const struct switchdev_obj *obj = data;
276 err = switchdev_port_obj_del_now(dev, obj);
277 if (err && err != -EOPNOTSUPP)
278 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
281 obj->complete(dev, err, obj->complete_priv);
284 static int switchdev_port_obj_del_defer(struct net_device *dev,
285 const struct switchdev_obj *obj)
287 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
288 switchdev_port_obj_del_deferred);
292 * switchdev_port_obj_del - Delete port object
295 * @obj: object to delete
297 * rtnl_lock must be held and must not be in atomic section,
298 * in case SWITCHDEV_F_DEFER flag is not set.
300 int switchdev_port_obj_del(struct net_device *dev,
301 const struct switchdev_obj *obj)
303 if (obj->flags & SWITCHDEV_F_DEFER)
304 return switchdev_port_obj_del_defer(dev, obj);
306 return switchdev_port_obj_del_now(dev, obj);
308 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
310 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
311 static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
314 * register_switchdev_notifier - Register notifier
315 * @nb: notifier_block
317 * Register switch device notifier.
319 int register_switchdev_notifier(struct notifier_block *nb)
321 return atomic_notifier_chain_register(&switchdev_notif_chain, nb);
323 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
326 * unregister_switchdev_notifier - Unregister notifier
327 * @nb: notifier_block
329 * Unregister switch device notifier.
331 int unregister_switchdev_notifier(struct notifier_block *nb)
333 return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb);
335 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
338 * call_switchdev_notifiers - Call notifiers
339 * @val: value passed unmodified to notifier function
341 * @info: notifier information data
342 * @extack: netlink extended ack
343 * Call all network notifier blocks.
345 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
346 struct switchdev_notifier_info *info,
347 struct netlink_ext_ack *extack)
350 info->extack = extack;
351 return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
353 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
355 int register_switchdev_blocking_notifier(struct notifier_block *nb)
357 struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
359 return blocking_notifier_chain_register(chain, nb);
361 EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
363 int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
365 struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
367 return blocking_notifier_chain_unregister(chain, nb);
369 EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
371 int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
372 struct switchdev_notifier_info *info,
373 struct netlink_ext_ack *extack)
376 info->extack = extack;
377 return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
380 EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
382 struct switchdev_nested_priv {
383 bool (*check_cb)(const struct net_device *dev);
384 bool (*foreign_dev_check_cb)(const struct net_device *dev,
385 const struct net_device *foreign_dev);
386 const struct net_device *dev;
387 struct net_device *lower_dev;
390 static int switchdev_lower_dev_walk(struct net_device *lower_dev,
391 struct netdev_nested_priv *priv)
393 struct switchdev_nested_priv *switchdev_priv = priv->data;
394 bool (*foreign_dev_check_cb)(const struct net_device *dev,
395 const struct net_device *foreign_dev);
396 bool (*check_cb)(const struct net_device *dev);
397 const struct net_device *dev;
399 check_cb = switchdev_priv->check_cb;
400 foreign_dev_check_cb = switchdev_priv->foreign_dev_check_cb;
401 dev = switchdev_priv->dev;
403 if (check_cb(lower_dev) && !foreign_dev_check_cb(lower_dev, dev)) {
404 switchdev_priv->lower_dev = lower_dev;
411 static struct net_device *
412 switchdev_lower_dev_find(struct net_device *dev,
413 bool (*check_cb)(const struct net_device *dev),
414 bool (*foreign_dev_check_cb)(const struct net_device *dev,
415 const struct net_device *foreign_dev))
417 struct switchdev_nested_priv switchdev_priv = {
418 .check_cb = check_cb,
419 .foreign_dev_check_cb = foreign_dev_check_cb,
423 struct netdev_nested_priv priv = {
424 .data = &switchdev_priv,
427 netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
429 return switchdev_priv.lower_dev;
432 static int __switchdev_handle_fdb_event_to_device(struct net_device *dev,
433 struct net_device *orig_dev, unsigned long event,
434 const struct switchdev_notifier_fdb_info *fdb_info,
435 bool (*check_cb)(const struct net_device *dev),
436 bool (*foreign_dev_check_cb)(const struct net_device *dev,
437 const struct net_device *foreign_dev),
438 int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
439 unsigned long event, const void *ctx,
440 const struct switchdev_notifier_fdb_info *fdb_info),
441 int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
442 unsigned long event, const void *ctx,
443 const struct switchdev_notifier_fdb_info *fdb_info))
445 const struct switchdev_notifier_info *info = &fdb_info->info;
446 struct net_device *br, *lower_dev;
447 struct list_head *iter;
448 int err = -EOPNOTSUPP;
451 return mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
453 if (netif_is_lag_master(dev)) {
454 if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
455 goto maybe_bridged_with_us;
457 /* This is a LAG interface that we offload */
461 return lag_mod_cb(dev, orig_dev, event, info->ctx, fdb_info);
464 /* Recurse through lower interfaces in case the FDB entry is pointing
465 * towards a bridge device.
467 if (netif_is_bridge_master(dev)) {
468 if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
471 /* This is a bridge interface that we offload */
472 netdev_for_each_lower_dev(dev, lower_dev, iter) {
473 /* Do not propagate FDB entries across bridges */
474 if (netif_is_bridge_master(lower_dev))
477 /* Bridge ports might be either us, or LAG interfaces
480 if (!check_cb(lower_dev) &&
481 !switchdev_lower_dev_find(lower_dev, check_cb,
482 foreign_dev_check_cb))
485 err = __switchdev_handle_fdb_event_to_device(lower_dev, orig_dev,
486 event, fdb_info, check_cb,
487 foreign_dev_check_cb,
489 if (err && err != -EOPNOTSUPP)
496 maybe_bridged_with_us:
497 /* Event is neither on a bridge nor a LAG. Check whether it is on an
498 * interface that is in a bridge with us.
500 br = netdev_master_upper_dev_get_rcu(dev);
501 if (!br || !netif_is_bridge_master(br))
504 if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
507 return __switchdev_handle_fdb_event_to_device(br, orig_dev, event, fdb_info,
508 check_cb, foreign_dev_check_cb,
512 int switchdev_handle_fdb_event_to_device(struct net_device *dev, unsigned long event,
513 const struct switchdev_notifier_fdb_info *fdb_info,
514 bool (*check_cb)(const struct net_device *dev),
515 bool (*foreign_dev_check_cb)(const struct net_device *dev,
516 const struct net_device *foreign_dev),
517 int (*mod_cb)(struct net_device *dev, struct net_device *orig_dev,
518 unsigned long event, const void *ctx,
519 const struct switchdev_notifier_fdb_info *fdb_info),
520 int (*lag_mod_cb)(struct net_device *dev, struct net_device *orig_dev,
521 unsigned long event, const void *ctx,
522 const struct switchdev_notifier_fdb_info *fdb_info))
526 err = __switchdev_handle_fdb_event_to_device(dev, dev, event, fdb_info,
527 check_cb, foreign_dev_check_cb,
529 if (err == -EOPNOTSUPP)
534 EXPORT_SYMBOL_GPL(switchdev_handle_fdb_event_to_device);
536 static int __switchdev_handle_port_obj_add(struct net_device *dev,
537 struct switchdev_notifier_port_obj_info *port_obj_info,
538 bool (*check_cb)(const struct net_device *dev),
539 int (*add_cb)(struct net_device *dev, const void *ctx,
540 const struct switchdev_obj *obj,
541 struct netlink_ext_ack *extack))
543 struct switchdev_notifier_info *info = &port_obj_info->info;
544 struct netlink_ext_ack *extack;
545 struct net_device *lower_dev;
546 struct list_head *iter;
547 int err = -EOPNOTSUPP;
549 extack = switchdev_notifier_info_to_extack(info);
552 err = add_cb(dev, info->ctx, port_obj_info->obj, extack);
553 if (err != -EOPNOTSUPP)
554 port_obj_info->handled = true;
558 /* Switch ports might be stacked under e.g. a LAG. Ignore the
559 * unsupported devices, another driver might be able to handle them. But
560 * propagate to the callers any hard errors.
562 * If the driver does its own bookkeeping of stacked ports, it's not
563 * necessary to go through this helper.
565 netdev_for_each_lower_dev(dev, lower_dev, iter) {
566 if (netif_is_bridge_master(lower_dev))
569 err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
571 if (err && err != -EOPNOTSUPP)
578 int switchdev_handle_port_obj_add(struct net_device *dev,
579 struct switchdev_notifier_port_obj_info *port_obj_info,
580 bool (*check_cb)(const struct net_device *dev),
581 int (*add_cb)(struct net_device *dev, const void *ctx,
582 const struct switchdev_obj *obj,
583 struct netlink_ext_ack *extack))
587 err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
589 if (err == -EOPNOTSUPP)
593 EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
595 static int __switchdev_handle_port_obj_del(struct net_device *dev,
596 struct switchdev_notifier_port_obj_info *port_obj_info,
597 bool (*check_cb)(const struct net_device *dev),
598 int (*del_cb)(struct net_device *dev, const void *ctx,
599 const struct switchdev_obj *obj))
601 struct switchdev_notifier_info *info = &port_obj_info->info;
602 struct net_device *lower_dev;
603 struct list_head *iter;
604 int err = -EOPNOTSUPP;
607 err = del_cb(dev, info->ctx, port_obj_info->obj);
608 if (err != -EOPNOTSUPP)
609 port_obj_info->handled = true;
613 /* Switch ports might be stacked under e.g. a LAG. Ignore the
614 * unsupported devices, another driver might be able to handle them. But
615 * propagate to the callers any hard errors.
617 * If the driver does its own bookkeeping of stacked ports, it's not
618 * necessary to go through this helper.
620 netdev_for_each_lower_dev(dev, lower_dev, iter) {
621 if (netif_is_bridge_master(lower_dev))
624 err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
626 if (err && err != -EOPNOTSUPP)
633 int switchdev_handle_port_obj_del(struct net_device *dev,
634 struct switchdev_notifier_port_obj_info *port_obj_info,
635 bool (*check_cb)(const struct net_device *dev),
636 int (*del_cb)(struct net_device *dev, const void *ctx,
637 const struct switchdev_obj *obj))
641 err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
643 if (err == -EOPNOTSUPP)
647 EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
649 static int __switchdev_handle_port_attr_set(struct net_device *dev,
650 struct switchdev_notifier_port_attr_info *port_attr_info,
651 bool (*check_cb)(const struct net_device *dev),
652 int (*set_cb)(struct net_device *dev, const void *ctx,
653 const struct switchdev_attr *attr,
654 struct netlink_ext_ack *extack))
656 struct switchdev_notifier_info *info = &port_attr_info->info;
657 struct netlink_ext_ack *extack;
658 struct net_device *lower_dev;
659 struct list_head *iter;
660 int err = -EOPNOTSUPP;
662 extack = switchdev_notifier_info_to_extack(info);
665 err = set_cb(dev, info->ctx, port_attr_info->attr, extack);
666 if (err != -EOPNOTSUPP)
667 port_attr_info->handled = true;
671 /* Switch ports might be stacked under e.g. a LAG. Ignore the
672 * unsupported devices, another driver might be able to handle them. But
673 * propagate to the callers any hard errors.
675 * If the driver does its own bookkeeping of stacked ports, it's not
676 * necessary to go through this helper.
678 netdev_for_each_lower_dev(dev, lower_dev, iter) {
679 if (netif_is_bridge_master(lower_dev))
682 err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info,
684 if (err && err != -EOPNOTSUPP)
691 int switchdev_handle_port_attr_set(struct net_device *dev,
692 struct switchdev_notifier_port_attr_info *port_attr_info,
693 bool (*check_cb)(const struct net_device *dev),
694 int (*set_cb)(struct net_device *dev, const void *ctx,
695 const struct switchdev_attr *attr,
696 struct netlink_ext_ack *extack))
700 err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb,
702 if (err == -EOPNOTSUPP)
706 EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
708 int switchdev_bridge_port_offload(struct net_device *brport_dev,
709 struct net_device *dev, const void *ctx,
710 struct notifier_block *atomic_nb,
711 struct notifier_block *blocking_nb,
713 struct netlink_ext_ack *extack)
715 struct switchdev_notifier_brport_info brport_info = {
719 .atomic_nb = atomic_nb,
720 .blocking_nb = blocking_nb,
721 .tx_fwd_offload = tx_fwd_offload,
728 err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED,
729 brport_dev, &brport_info.info,
731 return notifier_to_errno(err);
733 EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
735 void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
737 struct notifier_block *atomic_nb,
738 struct notifier_block *blocking_nb)
740 struct switchdev_notifier_brport_info brport_info = {
743 .atomic_nb = atomic_nb,
744 .blocking_nb = blocking_nb,
750 call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED,
751 brport_dev, &brport_info.info,
754 EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);