1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2020 Intel Corporation. */
4 #include <linux/device.h>
5 #include <linux/slab.h>
12 static DECLARE_RWSEM(cxl_memdev_rwsem);
15 * An entire PCI topology full of devices should be enough for any
18 #define CXL_MEM_MAX_DEVS 65536
20 static int cxl_mem_major;
21 static DEFINE_IDA(cxl_memdev_ida);
23 static void cxl_memdev_release(struct device *dev)
25 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
27 ida_free(&cxl_memdev_ida, cxlmd->id);
31 static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid,
34 return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
37 static ssize_t firmware_version_show(struct device *dev,
38 struct device_attribute *attr, char *buf)
40 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
41 struct cxl_dev_state *cxlds = cxlmd->cxlds;
43 return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version);
45 static DEVICE_ATTR_RO(firmware_version);
47 static ssize_t payload_max_show(struct device *dev,
48 struct device_attribute *attr, char *buf)
50 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
51 struct cxl_dev_state *cxlds = cxlmd->cxlds;
53 return sysfs_emit(buf, "%zu\n", cxlds->payload_size);
55 static DEVICE_ATTR_RO(payload_max);
57 static ssize_t label_storage_size_show(struct device *dev,
58 struct device_attribute *attr, char *buf)
60 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
61 struct cxl_dev_state *cxlds = cxlmd->cxlds;
63 return sysfs_emit(buf, "%zu\n", cxlds->lsa_size);
65 static DEVICE_ATTR_RO(label_storage_size);
67 static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
70 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
71 struct cxl_dev_state *cxlds = cxlmd->cxlds;
72 unsigned long long len = resource_size(&cxlds->ram_res);
74 return sysfs_emit(buf, "%#llx\n", len);
77 static struct device_attribute dev_attr_ram_size =
78 __ATTR(size, 0444, ram_size_show, NULL);
80 static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
83 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
84 struct cxl_dev_state *cxlds = cxlmd->cxlds;
85 unsigned long long len = resource_size(&cxlds->pmem_res);
87 return sysfs_emit(buf, "%#llx\n", len);
90 static struct device_attribute dev_attr_pmem_size =
91 __ATTR(size, 0444, pmem_size_show, NULL);
93 static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
96 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
97 struct cxl_dev_state *cxlds = cxlmd->cxlds;
99 return sysfs_emit(buf, "%#llx\n", cxlds->serial);
101 static DEVICE_ATTR_RO(serial);
103 static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
106 return sprintf(buf, "%d\n", dev_to_node(dev));
108 static DEVICE_ATTR_RO(numa_node);
110 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
112 struct cxl_dev_state *cxlds = cxlmd->cxlds;
116 /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */
117 if (resource_size(&cxlds->pmem_res)) {
118 offset = cxlds->pmem_res.start;
119 length = resource_size(&cxlds->pmem_res);
120 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
124 if (resource_size(&cxlds->ram_res)) {
125 offset = cxlds->ram_res.start;
126 length = resource_size(&cxlds->ram_res);
127 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
129 * Invalid Physical Address is not an error for
130 * volatile addresses. Device support is optional.
138 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd)
140 struct cxl_port *port;
143 port = dev_get_drvdata(&cxlmd->dev);
144 if (!port || !is_cxl_endpoint(port))
147 rc = down_read_interruptible(&cxl_dpa_rwsem);
151 if (port->commit_end == -1) {
152 /* No regions mapped to this memdev */
153 rc = cxl_get_poison_by_memdev(cxlmd);
155 /* Regions mapped, collect poison by endpoint */
156 rc = cxl_get_poison_by_endpoint(port);
158 up_read(&cxl_dpa_rwsem);
162 EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL);
164 struct cxl_dpa_to_region_context {
165 struct cxl_region *cxlr;
169 static int __cxl_dpa_to_region(struct device *dev, void *arg)
171 struct cxl_dpa_to_region_context *ctx = arg;
172 struct cxl_endpoint_decoder *cxled;
175 if (!is_endpoint_decoder(dev))
178 cxled = to_cxl_endpoint_decoder(dev);
179 if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
182 if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start)
185 dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa,
186 dev_name(&cxled->cxld.region->dev));
188 ctx->cxlr = cxled->cxld.region;
193 static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa)
195 struct cxl_dpa_to_region_context ctx;
196 struct cxl_port *port;
198 ctx = (struct cxl_dpa_to_region_context) {
201 port = dev_get_drvdata(&cxlmd->dev);
202 if (port && is_cxl_endpoint(port) && port->commit_end != -1)
203 device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region);
208 static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
210 struct cxl_dev_state *cxlds = cxlmd->cxlds;
212 if (!IS_ENABLED(CONFIG_DEBUG_FS))
215 if (!resource_size(&cxlds->dpa_res)) {
216 dev_dbg(cxlds->dev, "device has no dpa resource\n");
219 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) {
220 dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n",
221 dpa, &cxlds->dpa_res);
224 if (!IS_ALIGNED(dpa, 64)) {
225 dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa);
232 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
234 struct cxl_dev_state *cxlds = cxlmd->cxlds;
235 struct cxl_mbox_inject_poison inject;
236 struct cxl_poison_record record;
237 struct cxl_mbox_cmd mbox_cmd;
238 struct cxl_region *cxlr;
241 if (!IS_ENABLED(CONFIG_DEBUG_FS))
244 rc = down_read_interruptible(&cxl_dpa_rwsem);
248 rc = cxl_validate_poison_dpa(cxlmd, dpa);
252 inject.address = cpu_to_le64(dpa);
253 mbox_cmd = (struct cxl_mbox_cmd) {
254 .opcode = CXL_MBOX_OP_INJECT_POISON,
255 .size_in = sizeof(inject),
256 .payload_in = &inject,
258 rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
262 cxlr = cxl_dpa_to_region(cxlmd, dpa);
264 dev_warn_once(cxlds->dev,
265 "poison inject dpa:%#llx region: %s\n", dpa,
266 dev_name(&cxlr->dev));
268 record = (struct cxl_poison_record) {
269 .address = cpu_to_le64(dpa),
270 .length = cpu_to_le32(1),
272 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT);
274 up_read(&cxl_dpa_rwsem);
278 EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL);
280 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
282 struct cxl_dev_state *cxlds = cxlmd->cxlds;
283 struct cxl_mbox_clear_poison clear;
284 struct cxl_poison_record record;
285 struct cxl_mbox_cmd mbox_cmd;
286 struct cxl_region *cxlr;
289 if (!IS_ENABLED(CONFIG_DEBUG_FS))
292 rc = down_read_interruptible(&cxl_dpa_rwsem);
296 rc = cxl_validate_poison_dpa(cxlmd, dpa);
301 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command
302 * is defined to accept 64 bytes of write-data, along with the
303 * address to clear. This driver uses zeroes as write-data.
305 clear = (struct cxl_mbox_clear_poison) {
306 .address = cpu_to_le64(dpa)
309 mbox_cmd = (struct cxl_mbox_cmd) {
310 .opcode = CXL_MBOX_OP_CLEAR_POISON,
311 .size_in = sizeof(clear),
312 .payload_in = &clear,
315 rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
319 cxlr = cxl_dpa_to_region(cxlmd, dpa);
321 dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n",
322 dpa, dev_name(&cxlr->dev));
324 record = (struct cxl_poison_record) {
325 .address = cpu_to_le64(dpa),
326 .length = cpu_to_le32(1),
328 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR);
330 up_read(&cxl_dpa_rwsem);
334 EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL);
336 static struct attribute *cxl_memdev_attributes[] = {
337 &dev_attr_serial.attr,
338 &dev_attr_firmware_version.attr,
339 &dev_attr_payload_max.attr,
340 &dev_attr_label_storage_size.attr,
341 &dev_attr_numa_node.attr,
345 static struct attribute *cxl_memdev_pmem_attributes[] = {
346 &dev_attr_pmem_size.attr,
350 static struct attribute *cxl_memdev_ram_attributes[] = {
351 &dev_attr_ram_size.attr,
355 static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
358 if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
363 static struct attribute_group cxl_memdev_attribute_group = {
364 .attrs = cxl_memdev_attributes,
365 .is_visible = cxl_memdev_visible,
368 static struct attribute_group cxl_memdev_ram_attribute_group = {
370 .attrs = cxl_memdev_ram_attributes,
373 static struct attribute_group cxl_memdev_pmem_attribute_group = {
375 .attrs = cxl_memdev_pmem_attributes,
378 static const struct attribute_group *cxl_memdev_attribute_groups[] = {
379 &cxl_memdev_attribute_group,
380 &cxl_memdev_ram_attribute_group,
381 &cxl_memdev_pmem_attribute_group,
385 static const struct device_type cxl_memdev_type = {
386 .name = "cxl_memdev",
387 .release = cxl_memdev_release,
388 .devnode = cxl_memdev_devnode,
389 .groups = cxl_memdev_attribute_groups,
392 bool is_cxl_memdev(const struct device *dev)
394 return dev->type == &cxl_memdev_type;
396 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
399 * set_exclusive_cxl_commands() - atomically disable user cxl commands
400 * @cxlds: The device state to operate on
401 * @cmds: bitmap of commands to mark exclusive
403 * Grab the cxl_memdev_rwsem in write mode to flush in-flight
404 * invocations of the ioctl path and then disable future execution of
405 * commands with the command ids set in @cmds.
407 void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
409 down_write(&cxl_memdev_rwsem);
410 bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
411 CXL_MEM_COMMAND_ID_MAX);
412 up_write(&cxl_memdev_rwsem);
414 EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
417 * clear_exclusive_cxl_commands() - atomically enable user cxl commands
418 * @cxlds: The device state to modify
419 * @cmds: bitmap of commands to mark available for userspace
421 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
423 down_write(&cxl_memdev_rwsem);
424 bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
425 CXL_MEM_COMMAND_ID_MAX);
426 up_write(&cxl_memdev_rwsem);
428 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
430 static void cxl_memdev_shutdown(struct device *dev)
432 struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
434 down_write(&cxl_memdev_rwsem);
436 up_write(&cxl_memdev_rwsem);
439 static void cxl_memdev_unregister(void *_cxlmd)
441 struct cxl_memdev *cxlmd = _cxlmd;
442 struct device *dev = &cxlmd->dev;
444 cxl_memdev_shutdown(dev);
445 cdev_device_del(&cxlmd->cdev, dev);
449 static void detach_memdev(struct work_struct *work)
451 struct cxl_memdev *cxlmd;
453 cxlmd = container_of(work, typeof(*cxlmd), detach_work);
454 device_release_driver(&cxlmd->dev);
455 put_device(&cxlmd->dev);
458 static struct lock_class_key cxl_memdev_key;
460 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
461 const struct file_operations *fops)
463 struct cxl_memdev *cxlmd;
468 cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
470 return ERR_PTR(-ENOMEM);
472 rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
479 device_initialize(dev);
480 lockdep_set_class(&dev->mutex, &cxl_memdev_key);
481 dev->parent = cxlds->dev;
482 dev->bus = &cxl_bus_type;
483 dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
484 dev->type = &cxl_memdev_type;
485 device_set_pm_not_required(dev);
486 INIT_WORK(&cxlmd->detach_work, detach_memdev);
489 cdev_init(cdev, fops);
497 static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
501 case CXL_MEM_QUERY_COMMANDS:
502 return cxl_query_cmd(cxlmd, (void __user *)arg);
503 case CXL_MEM_SEND_COMMAND:
504 return cxl_send_cmd(cxlmd, (void __user *)arg);
510 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
513 struct cxl_memdev *cxlmd = file->private_data;
516 down_read(&cxl_memdev_rwsem);
518 rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
519 up_read(&cxl_memdev_rwsem);
524 static int cxl_memdev_open(struct inode *inode, struct file *file)
526 struct cxl_memdev *cxlmd =
527 container_of(inode->i_cdev, typeof(*cxlmd), cdev);
529 get_device(&cxlmd->dev);
530 file->private_data = cxlmd;
535 static int cxl_memdev_release_file(struct inode *inode, struct file *file)
537 struct cxl_memdev *cxlmd =
538 container_of(inode->i_cdev, typeof(*cxlmd), cdev);
540 put_device(&cxlmd->dev);
545 static const struct file_operations cxl_memdev_fops = {
546 .owner = THIS_MODULE,
547 .unlocked_ioctl = cxl_memdev_ioctl,
548 .open = cxl_memdev_open,
549 .release = cxl_memdev_release_file,
550 .compat_ioctl = compat_ptr_ioctl,
551 .llseek = noop_llseek,
554 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
556 struct cxl_memdev *cxlmd;
561 cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
566 rc = dev_set_name(dev, "mem%d", cxlmd->id);
571 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
572 * needed as this is ordered with cdev_add() publishing the device.
574 cxlmd->cxlds = cxlds;
575 cxlds->cxlmd = cxlmd;
578 rc = cdev_device_add(cdev, dev);
582 rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
589 * The cdev was briefly live, shutdown any ioctl operations that
592 cxl_memdev_shutdown(dev);
596 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
598 __init int cxl_memdev_init(void)
603 rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
607 cxl_mem_major = MAJOR(devt);
612 void cxl_memdev_exit(void)
614 unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);