1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
14 if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
17 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
18 ldev->pf[MLX5_LAG_P2].dev);
21 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
23 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
26 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
28 struct mlx5_lag *ldev;
31 ldev = mlx5_lag_dev_get(dev);
32 res = ldev && __mlx5_lag_is_multipath(ldev);
38 * Set lag port affinity
42 * 0 - set normal affinity.
43 * 1 - set affinity to port 1.
44 * 2 - set affinity to port 2.
47 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
48 enum mlx5_lag_port_affinity port)
50 struct lag_tracker tracker;
52 if (!__mlx5_lag_is_multipath(ldev))
56 case MLX5_LAG_NORMAL_AFFINITY:
57 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
58 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
59 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
60 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
62 case MLX5_LAG_P1_AFFINITY:
63 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
64 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
65 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
66 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
68 case MLX5_LAG_P2_AFFINITY:
69 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
70 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
71 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
72 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
75 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
76 "Invalid affinity port %d", port);
80 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
81 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
82 MLX5_DEV_EVENT_PORT_AFFINITY,
85 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
86 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
87 MLX5_DEV_EVENT_PORT_AFFINITY,
90 mlx5_modify_lag(ldev, &tracker);
93 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
95 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
96 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
98 flush_workqueue(ldev->wq);
101 struct mlx5_fib_event_work {
102 struct work_struct work;
103 struct mlx5_lag *ldev;
106 struct fib_entry_notifier_info fen_info;
107 struct fib_nh_notifier_info fnh_info;
111 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
115 struct lag_mp *mp = &ldev->lag_mp;
116 struct fib_nh *fib_nh0, *fib_nh1;
119 /* Handle delete event */
120 if (event == FIB_EVENT_ENTRY_DEL) {
127 /* Handle add/replace event */
128 nhs = fib_info_num_path(fi);
130 if (__mlx5_lag_is_active(ldev)) {
131 struct fib_nh *nh = fib_info_nh(fi, 0);
132 struct net_device *nh_dev = nh->fib_nh_dev;
133 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
135 mlx5_lag_set_port_affinity(ldev, ++i);
143 /* Verify next hops are ports of the same hca */
144 fib_nh0 = fib_info_nh(fi, 0);
145 fib_nh1 = fib_info_nh(fi, 1);
146 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
147 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
148 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
149 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
150 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
151 "Multipath offload require two ports of the same HCA\n");
155 /* First time we see multipath route */
156 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
157 struct lag_tracker tracker;
159 tracker = ldev->tracker;
160 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
163 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
167 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
169 struct fib_nh *fib_nh,
172 struct lag_mp *mp = &ldev->lag_mp;
174 /* Check the nh event is related to the route */
175 if (!mp->mfi || mp->mfi != fi)
178 /* nh added/removed */
179 if (event == FIB_EVENT_NH_DEL) {
180 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
183 i = (i + 1) % 2 + 1; /* peer port */
184 mlx5_lag_set_port_affinity(ldev, i);
186 } else if (event == FIB_EVENT_NH_ADD &&
187 fib_info_num_path(fi) == 2) {
188 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
192 static void mlx5_lag_fib_update(struct work_struct *work)
194 struct mlx5_fib_event_work *fib_work =
195 container_of(work, struct mlx5_fib_event_work, work);
196 struct mlx5_lag *ldev = fib_work->ldev;
197 struct fib_nh *fib_nh;
199 /* Protect internal structures from changes */
201 switch (fib_work->event) {
202 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
203 case FIB_EVENT_ENTRY_DEL:
204 mlx5_lag_fib_route_event(ldev, fib_work->event,
205 fib_work->fen_info.fi);
206 fib_info_put(fib_work->fen_info.fi);
208 case FIB_EVENT_NH_ADD: /* fall through */
209 case FIB_EVENT_NH_DEL:
210 fib_nh = fib_work->fnh_info.fib_nh;
211 mlx5_lag_fib_nexthop_event(ldev,
213 fib_work->fnh_info.fib_nh,
215 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
223 static struct mlx5_fib_event_work *
224 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
226 struct mlx5_fib_event_work *fib_work;
228 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
229 if (WARN_ON(!fib_work))
232 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
233 fib_work->ldev = ldev;
234 fib_work->event = event;
239 static int mlx5_lag_fib_event(struct notifier_block *nb,
243 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
244 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
245 struct fib_notifier_info *info = ptr;
246 struct mlx5_fib_event_work *fib_work;
247 struct fib_entry_notifier_info *fen_info;
248 struct fib_nh_notifier_info *fnh_info;
249 struct net_device *fib_dev;
252 if (info->family != AF_INET)
255 if (!mlx5_lag_multipath_check_prereq(ldev))
259 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
260 case FIB_EVENT_ENTRY_DEL:
261 fen_info = container_of(info, struct fib_entry_notifier_info,
265 NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
266 return notifier_from_errno(-EINVAL);
268 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
269 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
270 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
273 fib_work = mlx5_lag_init_fib_work(ldev, event);
276 fib_work->fen_info = *fen_info;
277 /* Take reference on fib_info to prevent it from being
278 * freed while work is queued. Release it afterwards.
280 fib_info_hold(fib_work->fen_info.fi);
282 case FIB_EVENT_NH_ADD: /* fall through */
283 case FIB_EVENT_NH_DEL:
284 fnh_info = container_of(info, struct fib_nh_notifier_info,
286 fib_work = mlx5_lag_init_fib_work(ldev, event);
289 fib_work->fnh_info = *fnh_info;
290 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
296 queue_work(ldev->wq, &fib_work->work);
301 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
303 struct lag_mp *mp = &ldev->lag_mp;
306 if (mp->fib_nb.notifier_call)
309 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
310 err = register_fib_notifier(&init_net, &mp->fib_nb,
311 mlx5_lag_fib_event_flush, NULL);
313 mp->fib_nb.notifier_call = NULL;
318 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
320 struct lag_mp *mp = &ldev->lag_mp;
322 if (!mp->fib_nb.notifier_call)
325 unregister_fib_notifier(&init_net, &mp->fib_nb);
326 mp->fib_nb.notifier_call = NULL;