1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Generic netlink support functions to configure an SMC-R PNET table
7 * Copyright IBM Corp. 2016
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
21 #include <rdma/ib_verbs.h>
26 #define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */
28 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
30 .type = NLA_NUL_STRING,
31 .len = SMC_MAX_PNET_ID_LEN - 1
33 [SMC_PNETID_ETHNAME] = {
34 .type = NLA_NUL_STRING,
37 [SMC_PNETID_IBNAME] = {
38 .type = NLA_NUL_STRING,
39 .len = IB_DEVICE_NAME_MAX - 1
41 [SMC_PNETID_IBPORT] = { .type = NLA_U8 }
44 static struct genl_family smc_pnet_nl_family;
47 * struct smc_pnettable - SMC PNET table anchor
48 * @lock: Lock for list action
49 * @pnetlist: List of PNETIDs
51 static struct smc_pnettable {
53 struct list_head pnetlist;
55 .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
56 .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
60 * struct smc_pnetentry - pnet identifier name entry
62 * @pnet_name: Pnet identifier name
63 * @ndev: pointer to network device.
64 * @smcibdev: Pointer to IB device.
66 struct smc_pnetentry {
67 struct list_head list;
68 char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
69 struct net_device *ndev;
70 struct smc_ib_device *smcibdev;
74 /* Check if two RDMA device entries are identical. Use device name and port
75 * number for comparison.
77 static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
80 return pnetelem->ib_port == ibport &&
81 !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
82 sizeof(pnetelem->smcibdev->ibdev->name));
85 /* Find a pnetid in the pnet table.
87 static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
89 struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
91 read_lock(&smc_pnettable.lock);
92 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
93 if (!strncmp(pnetelem->pnet_name, pnet_name,
94 sizeof(pnetelem->pnet_name))) {
95 found_pnetelem = pnetelem;
99 read_unlock(&smc_pnettable.lock);
100 return found_pnetelem;
103 /* Remove a pnetid from the pnet table.
105 static int smc_pnet_remove_by_pnetid(char *pnet_name)
107 struct smc_pnetentry *pnetelem, *tmp_pe;
110 write_lock(&smc_pnettable.lock);
111 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
113 if (!strncmp(pnetelem->pnet_name, pnet_name,
114 sizeof(pnetelem->pnet_name))) {
115 list_del(&pnetelem->list);
116 dev_put(pnetelem->ndev);
122 write_unlock(&smc_pnettable.lock);
126 /* Remove a pnet entry mentioning a given network device from the pnet table.
128 static int smc_pnet_remove_by_ndev(struct net_device *ndev)
130 struct smc_pnetentry *pnetelem, *tmp_pe;
133 write_lock(&smc_pnettable.lock);
134 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
136 if (pnetelem->ndev == ndev) {
137 list_del(&pnetelem->list);
138 dev_put(pnetelem->ndev);
144 write_unlock(&smc_pnettable.lock);
148 /* Remove a pnet entry mentioning a given ib device from the pnet table.
150 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
152 struct smc_pnetentry *pnetelem, *tmp_pe;
155 write_lock(&smc_pnettable.lock);
156 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
158 if (pnetelem->smcibdev == ibdev) {
159 list_del(&pnetelem->list);
160 dev_put(pnetelem->ndev);
166 write_unlock(&smc_pnettable.lock);
170 /* Append a pnetid to the end of the pnet table if not already on this list.
172 static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
174 struct smc_pnetentry *pnetelem;
177 write_lock(&smc_pnettable.lock);
178 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
179 if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
180 sizeof(new_pnetelem->pnet_name)) ||
181 !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
182 sizeof(new_pnetelem->ndev->name)) ||
183 smc_pnet_same_ibname(pnetelem,
184 new_pnetelem->smcibdev->ibdev->name,
185 new_pnetelem->ib_port)) {
186 dev_put(pnetelem->ndev);
190 list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
193 write_unlock(&smc_pnettable.lock);
197 /* The limit for pnetid is 16 characters.
198 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
199 * Lower case letters are converted to upper case.
200 * Interior blanks should not be used.
202 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
204 char *bf = skip_spaces(pnet_name);
205 size_t len = strlen(bf);
206 char *end = bf + len;
210 while (--end >= bf && isspace(*end))
212 if (end - bf >= SMC_MAX_PNET_ID_LEN)
217 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
224 /* Find an infiniband device by a given name. The device might not exist. */
225 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
227 struct smc_ib_device *ibdev;
229 spin_lock(&smc_ib_devices.lock);
230 list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
231 if (!strncmp(ibdev->ibdev->name, ib_name,
232 sizeof(ibdev->ibdev->name))) {
238 spin_unlock(&smc_ib_devices.lock);
242 /* Parse the supplied netlink attributes and fill a pnetentry structure.
243 * For ethernet and infiniband device names verify that the devices exist.
245 static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
248 char *string, *ibname;
251 memset(pnetelem, 0, sizeof(*pnetelem));
252 INIT_LIST_HEAD(&pnetelem->list);
255 if (!tb[SMC_PNETID_NAME])
257 string = (char *)nla_data(tb[SMC_PNETID_NAME]);
258 if (!smc_pnetid_valid(string, pnetelem->pnet_name))
262 if (!tb[SMC_PNETID_ETHNAME])
265 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
266 pnetelem->ndev = dev_get_by_name(net, string);
271 if (!tb[SMC_PNETID_IBNAME])
274 ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
275 ibname = strim(ibname);
276 pnetelem->smcibdev = smc_pnet_find_ib(ibname);
277 if (!pnetelem->smcibdev)
281 if (!tb[SMC_PNETID_IBPORT])
283 pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
284 if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
291 dev_put(pnetelem->ndev);
295 /* Convert an smc_pnetentry to a netlink attribute sequence */
296 static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
298 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
299 nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
300 nla_put_string(msg, SMC_PNETID_IBNAME,
301 pnetelem->smcibdev->ibdev->name) ||
302 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
307 /* Retrieve one PNETID entry */
308 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
310 struct smc_pnetentry *pnetelem;
315 if (!info->attrs[SMC_PNETID_NAME])
317 pnetelem = smc_pnet_find_pnetid(
318 (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
321 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
325 hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
326 &smc_pnet_nl_family, 0, SMC_PNETID_GET);
332 if (smc_pnet_set_nla(msg, pnetelem)) {
337 genlmsg_end(msg, hdr);
338 return genlmsg_reply(msg, info);
345 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
347 struct net *net = genl_info_net(info);
348 struct smc_pnetentry *pnetelem;
351 pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
354 rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
356 rc = smc_pnet_enter(pnetelem);
361 rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
363 smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
367 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
369 if (!info->attrs[SMC_PNETID_NAME])
371 return smc_pnet_remove_by_pnetid(
372 (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
375 static int smc_pnet_dump_start(struct netlink_callback *cb)
381 static int smc_pnet_dumpinfo(struct sk_buff *skb,
382 u32 portid, u32 seq, u32 flags,
383 struct smc_pnetentry *pnetelem)
387 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
388 flags, SMC_PNETID_GET);
391 if (smc_pnet_set_nla(skb, pnetelem) < 0) {
392 genlmsg_cancel(skb, hdr);
395 genlmsg_end(skb, hdr);
399 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
401 struct smc_pnetentry *pnetelem;
404 read_lock(&smc_pnettable.lock);
405 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
406 if (idx++ < cb->args[0])
408 if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
409 cb->nlh->nlmsg_seq, NLM_F_MULTI,
416 read_unlock(&smc_pnettable.lock);
420 /* Remove and delete all pnetids from pnet table.
422 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
424 struct smc_pnetentry *pnetelem, *tmp_pe;
426 write_lock(&smc_pnettable.lock);
427 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
429 list_del(&pnetelem->list);
430 dev_put(pnetelem->ndev);
433 write_unlock(&smc_pnettable.lock);
437 /* SMC_PNETID generic netlink operation definition */
438 static const struct genl_ops smc_pnet_ops[] = {
440 .cmd = SMC_PNETID_GET,
441 .flags = GENL_ADMIN_PERM,
442 .policy = smc_pnet_policy,
443 .doit = smc_pnet_get,
444 .dumpit = smc_pnet_dump,
445 .start = smc_pnet_dump_start
448 .cmd = SMC_PNETID_ADD,
449 .flags = GENL_ADMIN_PERM,
450 .policy = smc_pnet_policy,
454 .cmd = SMC_PNETID_DEL,
455 .flags = GENL_ADMIN_PERM,
456 .policy = smc_pnet_policy,
460 .cmd = SMC_PNETID_FLUSH,
461 .flags = GENL_ADMIN_PERM,
462 .policy = smc_pnet_policy,
463 .doit = smc_pnet_flush
467 /* SMC_PNETID family definition */
468 static struct genl_family smc_pnet_nl_family = {
470 .name = SMCR_GENL_FAMILY_NAME,
471 .version = SMCR_GENL_FAMILY_VERSION,
472 .maxattr = SMC_PNETID_MAX,
474 .module = THIS_MODULE,
476 .n_ops = ARRAY_SIZE(smc_pnet_ops)
479 static int smc_pnet_netdev_event(struct notifier_block *this,
480 unsigned long event, void *ptr)
482 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
486 case NETDEV_UNREGISTER:
487 smc_pnet_remove_by_ndev(event_dev);
494 static struct notifier_block smc_netdev_notifier = {
495 .notifier_call = smc_pnet_netdev_event
498 int __init smc_pnet_init(void)
502 rc = genl_register_family(&smc_pnet_nl_family);
505 rc = register_netdevice_notifier(&smc_netdev_notifier);
507 genl_unregister_family(&smc_pnet_nl_family);
511 void smc_pnet_exit(void)
513 smc_pnet_flush(NULL, NULL);
514 unregister_netdevice_notifier(&smc_netdev_notifier);
515 genl_unregister_family(&smc_pnet_nl_family);
518 /* PNET table analysis for a given sock:
519 * determine ib_device and port belonging to used internal TCP socket
520 * ethernet interface.
522 void smc_pnet_find_roce_resource(struct sock *sk,
523 struct smc_ib_device **smcibdev, u8 *ibport)
525 struct dst_entry *dst = sk_dst_get(sk);
526 struct smc_pnetentry *pnetelem;
535 read_lock(&smc_pnettable.lock);
536 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
537 if (dst->dev == pnetelem->ndev) {
538 if (smc_ib_port_active(pnetelem->smcibdev,
539 pnetelem->ib_port)) {
540 *smcibdev = pnetelem->smcibdev;
541 *ibport = pnetelem->ib_port;
546 read_unlock(&smc_pnettable.lock);