]> Git Repo - linux.git/blob - drivers/dax/bus.c
Linux 6.14-rc3
[linux.git] / drivers / dax / bus.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
3 #include <linux/memremap.h>
4 #include <linux/device.h>
5 #include <linux/mutex.h>
6 #include <linux/list.h>
7 #include <linux/slab.h>
8 #include <linux/dax.h>
9 #include <linux/io.h>
10 #include "dax-private.h"
11 #include "bus.h"
12
13 static DEFINE_MUTEX(dax_bus_lock);
14
15 /*
16  * All changes to the dax region configuration occur with this lock held
17  * for write.
18  */
19 DECLARE_RWSEM(dax_region_rwsem);
20
21 /*
22  * All changes to the dax device configuration occur with this lock held
23  * for write.
24  */
25 DECLARE_RWSEM(dax_dev_rwsem);
26
27 #define DAX_NAME_LEN 30
28 struct dax_id {
29         struct list_head list;
30         char dev_name[DAX_NAME_LEN];
31 };
32
33 static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env)
34 {
35         /*
36          * We only ever expect to handle device-dax instances, i.e. the
37          * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
38          */
39         return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0);
40 }
41
42 #define to_dax_drv(__drv)       container_of_const(__drv, struct dax_device_driver, drv)
43
44 static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv,
45                 const char *dev_name)
46 {
47         struct dax_id *dax_id;
48
49         lockdep_assert_held(&dax_bus_lock);
50
51         list_for_each_entry(dax_id, &dax_drv->ids, list)
52                 if (sysfs_streq(dax_id->dev_name, dev_name))
53                         return dax_id;
54         return NULL;
55 }
56
57 static int dax_match_id(const struct dax_device_driver *dax_drv, struct device *dev)
58 {
59         int match;
60
61         mutex_lock(&dax_bus_lock);
62         match = !!__dax_match_id(dax_drv, dev_name(dev));
63         mutex_unlock(&dax_bus_lock);
64
65         return match;
66 }
67
68 static int dax_match_type(const struct dax_device_driver *dax_drv, struct device *dev)
69 {
70         enum dax_driver_type type = DAXDRV_DEVICE_TYPE;
71         struct dev_dax *dev_dax = to_dev_dax(dev);
72
73         if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM)
74                 type = DAXDRV_KMEM_TYPE;
75
76         if (dax_drv->type == type)
77                 return 1;
78
79         /* default to device mode if dax_kmem is disabled */
80         if (dax_drv->type == DAXDRV_DEVICE_TYPE &&
81             !IS_ENABLED(CONFIG_DEV_DAX_KMEM))
82                 return 1;
83
84         return 0;
85 }
86
87 enum id_action {
88         ID_REMOVE,
89         ID_ADD,
90 };
91
92 static ssize_t do_id_store(struct device_driver *drv, const char *buf,
93                 size_t count, enum id_action action)
94 {
95         struct dax_device_driver *dax_drv = to_dax_drv(drv);
96         unsigned int region_id, id;
97         char devname[DAX_NAME_LEN];
98         struct dax_id *dax_id;
99         ssize_t rc = count;
100         int fields;
101
102         fields = sscanf(buf, "dax%d.%d", &region_id, &id);
103         if (fields != 2)
104                 return -EINVAL;
105         sprintf(devname, "dax%d.%d", region_id, id);
106         if (!sysfs_streq(buf, devname))
107                 return -EINVAL;
108
109         mutex_lock(&dax_bus_lock);
110         dax_id = __dax_match_id(dax_drv, buf);
111         if (!dax_id) {
112                 if (action == ID_ADD) {
113                         dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL);
114                         if (dax_id) {
115                                 strscpy(dax_id->dev_name, buf, DAX_NAME_LEN);
116                                 list_add(&dax_id->list, &dax_drv->ids);
117                         } else
118                                 rc = -ENOMEM;
119                 }
120         } else if (action == ID_REMOVE) {
121                 list_del(&dax_id->list);
122                 kfree(dax_id);
123         }
124         mutex_unlock(&dax_bus_lock);
125
126         if (rc < 0)
127                 return rc;
128         if (action == ID_ADD)
129                 rc = driver_attach(drv);
130         if (rc)
131                 return rc;
132         return count;
133 }
134
135 static ssize_t new_id_store(struct device_driver *drv, const char *buf,
136                 size_t count)
137 {
138         return do_id_store(drv, buf, count, ID_ADD);
139 }
140 static DRIVER_ATTR_WO(new_id);
141
142 static ssize_t remove_id_store(struct device_driver *drv, const char *buf,
143                 size_t count)
144 {
145         return do_id_store(drv, buf, count, ID_REMOVE);
146 }
147 static DRIVER_ATTR_WO(remove_id);
148
149 static struct attribute *dax_drv_attrs[] = {
150         &driver_attr_new_id.attr,
151         &driver_attr_remove_id.attr,
152         NULL,
153 };
154 ATTRIBUTE_GROUPS(dax_drv);
155
156 static int dax_bus_match(struct device *dev, const struct device_driver *drv);
157
158 /*
159  * Static dax regions are regions created by an external subsystem
160  * nvdimm where a single range is assigned. Its boundaries are by the external
161  * subsystem and are usually limited to one physical memory range. For example,
162  * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
163  * single contiguous range)
164  *
165  * On dynamic dax regions, the assigned region can be partitioned by dax core
166  * into multiple subdivisions. A subdivision is represented into one
167  * /dev/daxN.M device composed by one or more potentially discontiguous ranges.
168  *
169  * When allocating a dax region, drivers must set whether it's static
170  * (IORESOURCE_DAX_STATIC).  On static dax devices, the @pgmap is pre-assigned
171  * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
172  * devices it is NULL but afterwards allocated by dax core on device ->probe().
173  * Care is needed to make sure that dynamic dax devices are torn down with a
174  * cleared @pgmap field (see kill_dev_dax()).
175  */
176 static bool is_static(struct dax_region *dax_region)
177 {
178         return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
179 }
180
181 bool static_dev_dax(struct dev_dax *dev_dax)
182 {
183         return is_static(dev_dax->region);
184 }
185 EXPORT_SYMBOL_GPL(static_dev_dax);
186
187 static u64 dev_dax_size(struct dev_dax *dev_dax)
188 {
189         u64 size = 0;
190         int i;
191
192         lockdep_assert_held(&dax_dev_rwsem);
193
194         for (i = 0; i < dev_dax->nr_range; i++)
195                 size += range_len(&dev_dax->ranges[i].range);
196
197         return size;
198 }
199
200 static int dax_bus_probe(struct device *dev)
201 {
202         struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
203         struct dev_dax *dev_dax = to_dev_dax(dev);
204         struct dax_region *dax_region = dev_dax->region;
205         int rc;
206         u64 size;
207
208         rc = down_read_interruptible(&dax_dev_rwsem);
209         if (rc)
210                 return rc;
211         size = dev_dax_size(dev_dax);
212         up_read(&dax_dev_rwsem);
213
214         if (size == 0 || dev_dax->id < 0)
215                 return -ENXIO;
216
217         rc = dax_drv->probe(dev_dax);
218
219         if (rc || is_static(dax_region))
220                 return rc;
221
222         /*
223          * Track new seed creation only after successful probe of the
224          * previous seed.
225          */
226         if (dax_region->seed == dev)
227                 dax_region->seed = NULL;
228
229         return 0;
230 }
231
232 static void dax_bus_remove(struct device *dev)
233 {
234         struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
235         struct dev_dax *dev_dax = to_dev_dax(dev);
236
237         if (dax_drv->remove)
238                 dax_drv->remove(dev_dax);
239 }
240
241 static const struct bus_type dax_bus_type = {
242         .name = "dax",
243         .uevent = dax_bus_uevent,
244         .match = dax_bus_match,
245         .probe = dax_bus_probe,
246         .remove = dax_bus_remove,
247         .drv_groups = dax_drv_groups,
248 };
249
250 static int dax_bus_match(struct device *dev, const struct device_driver *drv)
251 {
252         const struct dax_device_driver *dax_drv = to_dax_drv(drv);
253
254         if (dax_match_id(dax_drv, dev))
255                 return 1;
256         return dax_match_type(dax_drv, dev);
257 }
258
259 /*
260  * Rely on the fact that drvdata is set before the attributes are
261  * registered, and that the attributes are unregistered before drvdata
262  * is cleared to assume that drvdata is always valid.
263  */
264 static ssize_t id_show(struct device *dev,
265                 struct device_attribute *attr, char *buf)
266 {
267         struct dax_region *dax_region = dev_get_drvdata(dev);
268
269         return sysfs_emit(buf, "%d\n", dax_region->id);
270 }
271 static DEVICE_ATTR_RO(id);
272
273 static ssize_t region_size_show(struct device *dev,
274                 struct device_attribute *attr, char *buf)
275 {
276         struct dax_region *dax_region = dev_get_drvdata(dev);
277
278         return sysfs_emit(buf, "%llu\n",
279                           (unsigned long long)resource_size(&dax_region->res));
280 }
281 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
282                 region_size_show, NULL);
283
284 static ssize_t region_align_show(struct device *dev,
285                 struct device_attribute *attr, char *buf)
286 {
287         struct dax_region *dax_region = dev_get_drvdata(dev);
288
289         return sysfs_emit(buf, "%u\n", dax_region->align);
290 }
291 static struct device_attribute dev_attr_region_align =
292                 __ATTR(align, 0400, region_align_show, NULL);
293
294 #define for_each_dax_region_resource(dax_region, res) \
295         for (res = (dax_region)->res.child; res; res = res->sibling)
296
297 static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
298 {
299         resource_size_t size = resource_size(&dax_region->res);
300         struct resource *res;
301
302         lockdep_assert_held(&dax_region_rwsem);
303
304         for_each_dax_region_resource(dax_region, res)
305                 size -= resource_size(res);
306         return size;
307 }
308
309 static ssize_t available_size_show(struct device *dev,
310                 struct device_attribute *attr, char *buf)
311 {
312         struct dax_region *dax_region = dev_get_drvdata(dev);
313         unsigned long long size;
314         int rc;
315
316         rc = down_read_interruptible(&dax_region_rwsem);
317         if (rc)
318                 return rc;
319         size = dax_region_avail_size(dax_region);
320         up_read(&dax_region_rwsem);
321
322         return sysfs_emit(buf, "%llu\n", size);
323 }
324 static DEVICE_ATTR_RO(available_size);
325
326 static ssize_t seed_show(struct device *dev,
327                 struct device_attribute *attr, char *buf)
328 {
329         struct dax_region *dax_region = dev_get_drvdata(dev);
330         struct device *seed;
331         ssize_t rc;
332
333         if (is_static(dax_region))
334                 return -EINVAL;
335
336         rc = down_read_interruptible(&dax_region_rwsem);
337         if (rc)
338                 return rc;
339         seed = dax_region->seed;
340         rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : "");
341         up_read(&dax_region_rwsem);
342
343         return rc;
344 }
345 static DEVICE_ATTR_RO(seed);
346
347 static ssize_t create_show(struct device *dev,
348                 struct device_attribute *attr, char *buf)
349 {
350         struct dax_region *dax_region = dev_get_drvdata(dev);
351         struct device *youngest;
352         ssize_t rc;
353
354         if (is_static(dax_region))
355                 return -EINVAL;
356
357         rc = down_read_interruptible(&dax_region_rwsem);
358         if (rc)
359                 return rc;
360         youngest = dax_region->youngest;
361         rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : "");
362         up_read(&dax_region_rwsem);
363
364         return rc;
365 }
366
367 static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data);
368
369 static ssize_t create_store(struct device *dev, struct device_attribute *attr,
370                 const char *buf, size_t len)
371 {
372         struct dax_region *dax_region = dev_get_drvdata(dev);
373         unsigned long long avail;
374         ssize_t rc;
375         int val;
376
377         if (is_static(dax_region))
378                 return -EINVAL;
379
380         rc = kstrtoint(buf, 0, &val);
381         if (rc)
382                 return rc;
383         if (val != 1)
384                 return -EINVAL;
385
386         rc = down_write_killable(&dax_region_rwsem);
387         if (rc)
388                 return rc;
389         avail = dax_region_avail_size(dax_region);
390         if (avail == 0)
391                 rc = -ENOSPC;
392         else {
393                 struct dev_dax_data data = {
394                         .dax_region = dax_region,
395                         .size = 0,
396                         .id = -1,
397                         .memmap_on_memory = false,
398                 };
399                 struct dev_dax *dev_dax = __devm_create_dev_dax(&data);
400
401                 if (IS_ERR(dev_dax))
402                         rc = PTR_ERR(dev_dax);
403                 else {
404                         /*
405                          * In support of crafting multiple new devices
406                          * simultaneously multiple seeds can be created,
407                          * but only the first one that has not been
408                          * successfully bound is tracked as the region
409                          * seed.
410                          */
411                         if (!dax_region->seed)
412                                 dax_region->seed = &dev_dax->dev;
413                         dax_region->youngest = &dev_dax->dev;
414                         rc = len;
415                 }
416         }
417         up_write(&dax_region_rwsem);
418
419         return rc;
420 }
421 static DEVICE_ATTR_RW(create);
422
423 void kill_dev_dax(struct dev_dax *dev_dax)
424 {
425         struct dax_device *dax_dev = dev_dax->dax_dev;
426         struct inode *inode = dax_inode(dax_dev);
427
428         kill_dax(dax_dev);
429         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
430
431         /*
432          * Dynamic dax region have the pgmap allocated via dev_kzalloc()
433          * and thus freed by devm. Clear the pgmap to not have stale pgmap
434          * ranges on probe() from previous reconfigurations of region devices.
435          */
436         if (!static_dev_dax(dev_dax))
437                 dev_dax->pgmap = NULL;
438 }
439 EXPORT_SYMBOL_GPL(kill_dev_dax);
440
441 static void trim_dev_dax_range(struct dev_dax *dev_dax)
442 {
443         int i = dev_dax->nr_range - 1;
444         struct range *range = &dev_dax->ranges[i].range;
445         struct dax_region *dax_region = dev_dax->region;
446
447         lockdep_assert_held_write(&dax_region_rwsem);
448         dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
449                 (unsigned long long)range->start,
450                 (unsigned long long)range->end);
451
452         __release_region(&dax_region->res, range->start, range_len(range));
453         if (--dev_dax->nr_range == 0) {
454                 kfree(dev_dax->ranges);
455                 dev_dax->ranges = NULL;
456         }
457 }
458
459 static void free_dev_dax_ranges(struct dev_dax *dev_dax)
460 {
461         while (dev_dax->nr_range)
462                 trim_dev_dax_range(dev_dax);
463 }
464
465 static void unregister_dev_dax(void *dev)
466 {
467         struct dev_dax *dev_dax = to_dev_dax(dev);
468
469         dev_dbg(dev, "%s\n", __func__);
470
471         down_write(&dax_region_rwsem);
472         kill_dev_dax(dev_dax);
473         device_del(dev);
474         free_dev_dax_ranges(dev_dax);
475         put_device(dev);
476         up_write(&dax_region_rwsem);
477 }
478
479 static void dax_region_free(struct kref *kref)
480 {
481         struct dax_region *dax_region;
482
483         dax_region = container_of(kref, struct dax_region, kref);
484         kfree(dax_region);
485 }
486
487 static void dax_region_put(struct dax_region *dax_region)
488 {
489         kref_put(&dax_region->kref, dax_region_free);
490 }
491
492 /* a return value >= 0 indicates this invocation invalidated the id */
493 static int __free_dev_dax_id(struct dev_dax *dev_dax)
494 {
495         struct dax_region *dax_region;
496         int rc = dev_dax->id;
497
498         lockdep_assert_held_write(&dax_dev_rwsem);
499
500         if (!dev_dax->dyn_id || dev_dax->id < 0)
501                 return -1;
502         dax_region = dev_dax->region;
503         ida_free(&dax_region->ida, dev_dax->id);
504         dax_region_put(dax_region);
505         dev_dax->id = -1;
506         return rc;
507 }
508
509 static int free_dev_dax_id(struct dev_dax *dev_dax)
510 {
511         int rc;
512
513         rc = down_write_killable(&dax_dev_rwsem);
514         if (rc)
515                 return rc;
516         rc = __free_dev_dax_id(dev_dax);
517         up_write(&dax_dev_rwsem);
518         return rc;
519 }
520
521 static int alloc_dev_dax_id(struct dev_dax *dev_dax)
522 {
523         struct dax_region *dax_region = dev_dax->region;
524         int id;
525
526         id = ida_alloc(&dax_region->ida, GFP_KERNEL);
527         if (id < 0)
528                 return id;
529         kref_get(&dax_region->kref);
530         dev_dax->dyn_id = true;
531         dev_dax->id = id;
532         return id;
533 }
534
535 static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
536                 const char *buf, size_t len)
537 {
538         struct dax_region *dax_region = dev_get_drvdata(dev);
539         struct dev_dax *dev_dax;
540         struct device *victim;
541         bool do_del = false;
542         int rc;
543
544         if (is_static(dax_region))
545                 return -EINVAL;
546
547         victim = device_find_child_by_name(dax_region->dev, buf);
548         if (!victim)
549                 return -ENXIO;
550
551         device_lock(dev);
552         device_lock(victim);
553         dev_dax = to_dev_dax(victim);
554         down_write(&dax_dev_rwsem);
555         if (victim->driver || dev_dax_size(dev_dax))
556                 rc = -EBUSY;
557         else {
558                 /*
559                  * Invalidate the device so it does not become active
560                  * again, but always preserve device-id-0 so that
561                  * /sys/bus/dax/ is guaranteed to be populated while any
562                  * dax_region is registered.
563                  */
564                 if (dev_dax->id > 0) {
565                         do_del = __free_dev_dax_id(dev_dax) >= 0;
566                         rc = len;
567                         if (dax_region->seed == victim)
568                                 dax_region->seed = NULL;
569                         if (dax_region->youngest == victim)
570                                 dax_region->youngest = NULL;
571                 } else
572                         rc = -EBUSY;
573         }
574         up_write(&dax_dev_rwsem);
575         device_unlock(victim);
576
577         /* won the race to invalidate the device, clean it up */
578         if (do_del)
579                 devm_release_action(dev, unregister_dev_dax, victim);
580         device_unlock(dev);
581         put_device(victim);
582
583         return rc;
584 }
585 static DEVICE_ATTR_WO(delete);
586
587 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a,
588                 int n)
589 {
590         struct device *dev = container_of(kobj, struct device, kobj);
591         struct dax_region *dax_region = dev_get_drvdata(dev);
592
593         if (is_static(dax_region))
594                 if (a == &dev_attr_available_size.attr
595                                 || a == &dev_attr_create.attr
596                                 || a == &dev_attr_seed.attr
597                                 || a == &dev_attr_delete.attr)
598                         return 0;
599         return a->mode;
600 }
601
602 static struct attribute *dax_region_attributes[] = {
603         &dev_attr_available_size.attr,
604         &dev_attr_region_size.attr,
605         &dev_attr_region_align.attr,
606         &dev_attr_create.attr,
607         &dev_attr_seed.attr,
608         &dev_attr_delete.attr,
609         &dev_attr_id.attr,
610         NULL,
611 };
612
613 static const struct attribute_group dax_region_attribute_group = {
614         .name = "dax_region",
615         .attrs = dax_region_attributes,
616         .is_visible = dax_region_visible,
617 };
618
619 static const struct attribute_group *dax_region_attribute_groups[] = {
620         &dax_region_attribute_group,
621         NULL,
622 };
623
624 static void dax_region_unregister(void *region)
625 {
626         struct dax_region *dax_region = region;
627
628         sysfs_remove_groups(&dax_region->dev->kobj,
629                         dax_region_attribute_groups);
630         dax_region_put(dax_region);
631 }
632
633 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
634                 struct range *range, int target_node, unsigned int align,
635                 unsigned long flags)
636 {
637         struct dax_region *dax_region;
638
639         /*
640          * The DAX core assumes that it can store its private data in
641          * parent->driver_data. This WARN is a reminder / safeguard for
642          * developers of device-dax drivers.
643          */
644         if (dev_get_drvdata(parent)) {
645                 dev_WARN(parent, "dax core failed to setup private data\n");
646                 return NULL;
647         }
648
649         if (!IS_ALIGNED(range->start, align)
650                         || !IS_ALIGNED(range_len(range), align))
651                 return NULL;
652
653         dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
654         if (!dax_region)
655                 return NULL;
656
657         dev_set_drvdata(parent, dax_region);
658         kref_init(&dax_region->kref);
659         dax_region->id = region_id;
660         dax_region->align = align;
661         dax_region->dev = parent;
662         dax_region->target_node = target_node;
663         ida_init(&dax_region->ida);
664         dax_region->res = (struct resource) {
665                 .start = range->start,
666                 .end = range->end,
667                 .flags = IORESOURCE_MEM | flags,
668         };
669
670         if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
671                 kfree(dax_region);
672                 return NULL;
673         }
674
675         if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
676                 return NULL;
677         return dax_region;
678 }
679 EXPORT_SYMBOL_GPL(alloc_dax_region);
680
681 static void dax_mapping_release(struct device *dev)
682 {
683         struct dax_mapping *mapping = to_dax_mapping(dev);
684         struct device *parent = dev->parent;
685         struct dev_dax *dev_dax = to_dev_dax(parent);
686
687         ida_free(&dev_dax->ida, mapping->id);
688         kfree(mapping);
689         put_device(parent);
690 }
691
692 static void unregister_dax_mapping(void *data)
693 {
694         struct device *dev = data;
695         struct dax_mapping *mapping = to_dax_mapping(dev);
696         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
697
698         dev_dbg(dev, "%s\n", __func__);
699
700         dev_dax->ranges[mapping->range_id].mapping = NULL;
701         mapping->range_id = -1;
702
703         device_unregister(dev);
704 }
705
706 static struct dev_dax_range *get_dax_range(struct device *dev)
707 {
708         struct dax_mapping *mapping = to_dax_mapping(dev);
709         struct dev_dax *dev_dax = to_dev_dax(dev->parent);
710         int rc;
711
712         rc = down_write_killable(&dax_region_rwsem);
713         if (rc)
714                 return NULL;
715         if (mapping->range_id < 0) {
716                 up_write(&dax_region_rwsem);
717                 return NULL;
718         }
719
720         return &dev_dax->ranges[mapping->range_id];
721 }
722
723 static void put_dax_range(void)
724 {
725         up_write(&dax_region_rwsem);
726 }
727
728 static ssize_t start_show(struct device *dev,
729                 struct device_attribute *attr, char *buf)
730 {
731         struct dev_dax_range *dax_range;
732         ssize_t rc;
733
734         dax_range = get_dax_range(dev);
735         if (!dax_range)
736                 return -ENXIO;
737         rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start);
738         put_dax_range();
739
740         return rc;
741 }
742 static DEVICE_ATTR(start, 0400, start_show, NULL);
743
744 static ssize_t end_show(struct device *dev,
745                 struct device_attribute *attr, char *buf)
746 {
747         struct dev_dax_range *dax_range;
748         ssize_t rc;
749
750         dax_range = get_dax_range(dev);
751         if (!dax_range)
752                 return -ENXIO;
753         rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end);
754         put_dax_range();
755
756         return rc;
757 }
758 static DEVICE_ATTR(end, 0400, end_show, NULL);
759
760 static ssize_t pgoff_show(struct device *dev,
761                 struct device_attribute *attr, char *buf)
762 {
763         struct dev_dax_range *dax_range;
764         ssize_t rc;
765
766         dax_range = get_dax_range(dev);
767         if (!dax_range)
768                 return -ENXIO;
769         rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff);
770         put_dax_range();
771
772         return rc;
773 }
774 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL);
775
776 static struct attribute *dax_mapping_attributes[] = {
777         &dev_attr_start.attr,
778         &dev_attr_end.attr,
779         &dev_attr_page_offset.attr,
780         NULL,
781 };
782
783 static const struct attribute_group dax_mapping_attribute_group = {
784         .attrs = dax_mapping_attributes,
785 };
786
787 static const struct attribute_group *dax_mapping_attribute_groups[] = {
788         &dax_mapping_attribute_group,
789         NULL,
790 };
791
792 static const struct device_type dax_mapping_type = {
793         .release = dax_mapping_release,
794         .groups = dax_mapping_attribute_groups,
795 };
796
797 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
798 {
799         struct dax_region *dax_region = dev_dax->region;
800         struct dax_mapping *mapping;
801         struct device *dev;
802         int rc;
803
804         lockdep_assert_held_write(&dax_region_rwsem);
805
806         if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
807                                 "region disabled\n"))
808                 return -ENXIO;
809
810         mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
811         if (!mapping)
812                 return -ENOMEM;
813         mapping->range_id = range_id;
814         mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL);
815         if (mapping->id < 0) {
816                 kfree(mapping);
817                 return -ENOMEM;
818         }
819         dev_dax->ranges[range_id].mapping = mapping;
820         dev = &mapping->dev;
821         device_initialize(dev);
822         dev->parent = &dev_dax->dev;
823         get_device(dev->parent);
824         dev->type = &dax_mapping_type;
825         dev_set_name(dev, "mapping%d", mapping->id);
826         rc = device_add(dev);
827         if (rc) {
828                 put_device(dev);
829                 return rc;
830         }
831
832         rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping,
833                         dev);
834         if (rc)
835                 return rc;
836         return 0;
837 }
838
839 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
840                 resource_size_t size)
841 {
842         struct dax_region *dax_region = dev_dax->region;
843         struct resource *res = &dax_region->res;
844         struct device *dev = &dev_dax->dev;
845         struct dev_dax_range *ranges;
846         unsigned long pgoff = 0;
847         struct resource *alloc;
848         int i, rc;
849
850         lockdep_assert_held_write(&dax_region_rwsem);
851
852         /* handle the seed alloc special case */
853         if (!size) {
854                 if (dev_WARN_ONCE(dev, dev_dax->nr_range,
855                                         "0-size allocation must be first\n"))
856                         return -EBUSY;
857                 /* nr_range == 0 is elsewhere special cased as 0-size device */
858                 return 0;
859         }
860
861         alloc = __request_region(res, start, size, dev_name(dev), 0);
862         if (!alloc)
863                 return -ENOMEM;
864
865         ranges = krealloc(dev_dax->ranges, sizeof(*ranges)
866                         * (dev_dax->nr_range + 1), GFP_KERNEL);
867         if (!ranges) {
868                 __release_region(res, alloc->start, resource_size(alloc));
869                 return -ENOMEM;
870         }
871
872         for (i = 0; i < dev_dax->nr_range; i++)
873                 pgoff += PHYS_PFN(range_len(&ranges[i].range));
874         dev_dax->ranges = ranges;
875         ranges[dev_dax->nr_range++] = (struct dev_dax_range) {
876                 .pgoff = pgoff,
877                 .range = {
878                         .start = alloc->start,
879                         .end = alloc->end,
880                 },
881         };
882
883         dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
884                         &alloc->start, &alloc->end);
885         /*
886          * A dev_dax instance must be registered before mapping device
887          * children can be added. Defer to devm_create_dev_dax() to add
888          * the initial mapping device.
889          */
890         if (!device_is_registered(&dev_dax->dev))
891                 return 0;
892
893         rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1);
894         if (rc)
895                 trim_dev_dax_range(dev_dax);
896
897         return rc;
898 }
899
900 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
901 {
902         int last_range = dev_dax->nr_range - 1;
903         struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
904         bool is_shrink = resource_size(res) > size;
905         struct range *range = &dax_range->range;
906         struct device *dev = &dev_dax->dev;
907         int rc;
908
909         lockdep_assert_held_write(&dax_region_rwsem);
910
911         if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
912                 return -EINVAL;
913
914         rc = adjust_resource(res, range->start, size);
915         if (rc)
916                 return rc;
917
918         *range = (struct range) {
919                 .start = range->start,
920                 .end = range->start + size - 1,
921         };
922
923         dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend",
924                         last_range, (unsigned long long) range->start,
925                         (unsigned long long) range->end);
926
927         return 0;
928 }
929
930 static ssize_t size_show(struct device *dev,
931                 struct device_attribute *attr, char *buf)
932 {
933         struct dev_dax *dev_dax = to_dev_dax(dev);
934         unsigned long long size;
935         int rc;
936
937         rc = down_read_interruptible(&dax_dev_rwsem);
938         if (rc)
939                 return rc;
940         size = dev_dax_size(dev_dax);
941         up_read(&dax_dev_rwsem);
942
943         return sysfs_emit(buf, "%llu\n", size);
944 }
945
946 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
947 {
948         /*
949          * The minimum mapping granularity for a device instance is a
950          * single subsection, unless the arch says otherwise.
951          */
952         return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align()));
953 }
954
955 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size)
956 {
957         resource_size_t to_shrink = dev_dax_size(dev_dax) - size;
958         struct dax_region *dax_region = dev_dax->region;
959         struct device *dev = &dev_dax->dev;
960         int i;
961
962         for (i = dev_dax->nr_range - 1; i >= 0; i--) {
963                 struct range *range = &dev_dax->ranges[i].range;
964                 struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
965                 struct resource *adjust = NULL, *res;
966                 resource_size_t shrink;
967
968                 shrink = min_t(u64, to_shrink, range_len(range));
969                 if (shrink >= range_len(range)) {
970                         devm_release_action(dax_region->dev,
971                                         unregister_dax_mapping, &mapping->dev);
972                         trim_dev_dax_range(dev_dax);
973                         to_shrink -= shrink;
974                         if (!to_shrink)
975                                 break;
976                         continue;
977                 }
978
979                 for_each_dax_region_resource(dax_region, res)
980                         if (strcmp(res->name, dev_name(dev)) == 0
981                                         && res->start == range->start) {
982                                 adjust = res;
983                                 break;
984                         }
985
986                 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
987                                         "failed to find matching resource\n"))
988                         return -ENXIO;
989                 return adjust_dev_dax_range(dev_dax, adjust, range_len(range)
990                                 - shrink);
991         }
992         return 0;
993 }
994
995 /*
996  * Only allow adjustments that preserve the relative pgoff of existing
997  * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
998  */
999 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res)
1000 {
1001         struct dev_dax_range *last;
1002         int i;
1003
1004         if (dev_dax->nr_range == 0)
1005                 return false;
1006         if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0)
1007                 return false;
1008         last = &dev_dax->ranges[dev_dax->nr_range - 1];
1009         if (last->range.start != res->start || last->range.end != res->end)
1010                 return false;
1011         for (i = 0; i < dev_dax->nr_range - 1; i++) {
1012                 struct dev_dax_range *dax_range = &dev_dax->ranges[i];
1013
1014                 if (dax_range->pgoff > last->pgoff)
1015                         return false;
1016         }
1017
1018         return true;
1019 }
1020
1021 static ssize_t dev_dax_resize(struct dax_region *dax_region,
1022                 struct dev_dax *dev_dax, resource_size_t size)
1023 {
1024         resource_size_t avail = dax_region_avail_size(dax_region), to_alloc;
1025         resource_size_t dev_size = dev_dax_size(dev_dax);
1026         struct resource *region_res = &dax_region->res;
1027         struct device *dev = &dev_dax->dev;
1028         struct resource *res, *first;
1029         resource_size_t alloc = 0;
1030         int rc;
1031
1032         if (dev->driver)
1033                 return -EBUSY;
1034         if (size == dev_size)
1035                 return 0;
1036         if (size > dev_size && size - dev_size > avail)
1037                 return -ENOSPC;
1038         if (size < dev_size)
1039                 return dev_dax_shrink(dev_dax, size);
1040
1041         to_alloc = size - dev_size;
1042         if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc),
1043                         "resize of %pa misaligned\n", &to_alloc))
1044                 return -ENXIO;
1045
1046         /*
1047          * Expand the device into the unused portion of the region. This
1048          * may involve adjusting the end of an existing resource, or
1049          * allocating a new resource.
1050          */
1051 retry:
1052         first = region_res->child;
1053         if (!first)
1054                 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc);
1055
1056         rc = -ENOSPC;
1057         for (res = first; res; res = res->sibling) {
1058                 struct resource *next = res->sibling;
1059
1060                 /* space at the beginning of the region */
1061                 if (res == first && res->start > dax_region->res.start) {
1062                         alloc = min(res->start - dax_region->res.start, to_alloc);
1063                         rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc);
1064                         break;
1065                 }
1066
1067                 alloc = 0;
1068                 /* space between allocations */
1069                 if (next && next->start > res->end + 1)
1070                         alloc = min(next->start - (res->end + 1), to_alloc);
1071
1072                 /* space at the end of the region */
1073                 if (!alloc && !next && res->end < region_res->end)
1074                         alloc = min(region_res->end - res->end, to_alloc);
1075
1076                 if (!alloc)
1077                         continue;
1078
1079                 if (adjust_ok(dev_dax, res)) {
1080                         rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc);
1081                         break;
1082                 }
1083                 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc);
1084                 break;
1085         }
1086         if (rc)
1087                 return rc;
1088         to_alloc -= alloc;
1089         if (to_alloc)
1090                 goto retry;
1091         return 0;
1092 }
1093
1094 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
1095                 const char *buf, size_t len)
1096 {
1097         ssize_t rc;
1098         unsigned long long val;
1099         struct dev_dax *dev_dax = to_dev_dax(dev);
1100         struct dax_region *dax_region = dev_dax->region;
1101
1102         rc = kstrtoull(buf, 0, &val);
1103         if (rc)
1104                 return rc;
1105
1106         if (!alloc_is_aligned(dev_dax, val)) {
1107                 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val);
1108                 return -EINVAL;
1109         }
1110
1111         rc = down_write_killable(&dax_region_rwsem);
1112         if (rc)
1113                 return rc;
1114         if (!dax_region->dev->driver) {
1115                 rc = -ENXIO;
1116                 goto err_region;
1117         }
1118         rc = down_write_killable(&dax_dev_rwsem);
1119         if (rc)
1120                 goto err_dev;
1121
1122         rc = dev_dax_resize(dax_region, dev_dax, val);
1123
1124 err_dev:
1125         up_write(&dax_dev_rwsem);
1126 err_region:
1127         up_write(&dax_region_rwsem);
1128
1129         if (rc == 0)
1130                 return len;
1131         return rc;
1132 }
1133 static DEVICE_ATTR_RW(size);
1134
1135 static ssize_t range_parse(const char *opt, size_t len, struct range *range)
1136 {
1137         unsigned long long addr = 0;
1138         char *start, *end, *str;
1139         ssize_t rc = -EINVAL;
1140
1141         str = kstrdup(opt, GFP_KERNEL);
1142         if (!str)
1143                 return rc;
1144
1145         end = str;
1146         start = strsep(&end, "-");
1147         if (!start || !end)
1148                 goto err;
1149
1150         rc = kstrtoull(start, 16, &addr);
1151         if (rc)
1152                 goto err;
1153         range->start = addr;
1154
1155         rc = kstrtoull(end, 16, &addr);
1156         if (rc)
1157                 goto err;
1158         range->end = addr;
1159
1160 err:
1161         kfree(str);
1162         return rc;
1163 }
1164
1165 static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
1166                 const char *buf, size_t len)
1167 {
1168         struct dev_dax *dev_dax = to_dev_dax(dev);
1169         struct dax_region *dax_region = dev_dax->region;
1170         size_t to_alloc;
1171         struct range r;
1172         ssize_t rc;
1173
1174         rc = range_parse(buf, len, &r);
1175         if (rc)
1176                 return rc;
1177
1178         rc = down_write_killable(&dax_region_rwsem);
1179         if (rc)
1180                 return rc;
1181         if (!dax_region->dev->driver) {
1182                 up_write(&dax_region_rwsem);
1183                 return rc;
1184         }
1185         rc = down_write_killable(&dax_dev_rwsem);
1186         if (rc) {
1187                 up_write(&dax_region_rwsem);
1188                 return rc;
1189         }
1190
1191         to_alloc = range_len(&r);
1192         if (alloc_is_aligned(dev_dax, to_alloc))
1193                 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
1194         up_write(&dax_dev_rwsem);
1195         up_write(&dax_region_rwsem);
1196
1197         return rc == 0 ? len : rc;
1198 }
1199 static DEVICE_ATTR_WO(mapping);
1200
1201 static ssize_t align_show(struct device *dev,
1202                 struct device_attribute *attr, char *buf)
1203 {
1204         struct dev_dax *dev_dax = to_dev_dax(dev);
1205
1206         return sysfs_emit(buf, "%d\n", dev_dax->align);
1207 }
1208
1209 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
1210 {
1211         struct device *dev = &dev_dax->dev;
1212         int i;
1213
1214         for (i = 0; i < dev_dax->nr_range; i++) {
1215                 size_t len = range_len(&dev_dax->ranges[i].range);
1216
1217                 if (!alloc_is_aligned(dev_dax, len)) {
1218                         dev_dbg(dev, "%s: align %u invalid for range %d\n",
1219                                 __func__, dev_dax->align, i);
1220                         return -EINVAL;
1221                 }
1222         }
1223
1224         return 0;
1225 }
1226
1227 static ssize_t align_store(struct device *dev, struct device_attribute *attr,
1228                 const char *buf, size_t len)
1229 {
1230         struct dev_dax *dev_dax = to_dev_dax(dev);
1231         struct dax_region *dax_region = dev_dax->region;
1232         unsigned long val, align_save;
1233         ssize_t rc;
1234
1235         rc = kstrtoul(buf, 0, &val);
1236         if (rc)
1237                 return -ENXIO;
1238
1239         if (!dax_align_valid(val))
1240                 return -EINVAL;
1241
1242         rc = down_write_killable(&dax_region_rwsem);
1243         if (rc)
1244                 return rc;
1245         if (!dax_region->dev->driver) {
1246                 up_write(&dax_region_rwsem);
1247                 return -ENXIO;
1248         }
1249
1250         rc = down_write_killable(&dax_dev_rwsem);
1251         if (rc) {
1252                 up_write(&dax_region_rwsem);
1253                 return rc;
1254         }
1255         if (dev->driver) {
1256                 rc = -EBUSY;
1257                 goto out_unlock;
1258         }
1259
1260         align_save = dev_dax->align;
1261         dev_dax->align = val;
1262         rc = dev_dax_validate_align(dev_dax);
1263         if (rc)
1264                 dev_dax->align = align_save;
1265 out_unlock:
1266         up_write(&dax_dev_rwsem);
1267         up_write(&dax_region_rwsem);
1268         return rc == 0 ? len : rc;
1269 }
1270 static DEVICE_ATTR_RW(align);
1271
1272 static int dev_dax_target_node(struct dev_dax *dev_dax)
1273 {
1274         struct dax_region *dax_region = dev_dax->region;
1275
1276         return dax_region->target_node;
1277 }
1278
1279 static ssize_t target_node_show(struct device *dev,
1280                 struct device_attribute *attr, char *buf)
1281 {
1282         struct dev_dax *dev_dax = to_dev_dax(dev);
1283
1284         return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax));
1285 }
1286 static DEVICE_ATTR_RO(target_node);
1287
1288 static ssize_t resource_show(struct device *dev,
1289                 struct device_attribute *attr, char *buf)
1290 {
1291         struct dev_dax *dev_dax = to_dev_dax(dev);
1292         struct dax_region *dax_region = dev_dax->region;
1293         unsigned long long start;
1294
1295         if (dev_dax->nr_range < 1)
1296                 start = dax_region->res.start;
1297         else
1298                 start = dev_dax->ranges[0].range.start;
1299
1300         return sysfs_emit(buf, "%#llx\n", start);
1301 }
1302 static DEVICE_ATTR(resource, 0400, resource_show, NULL);
1303
1304 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1305                 char *buf)
1306 {
1307         /*
1308          * We only ever expect to handle device-dax instances, i.e. the
1309          * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
1310          */
1311         return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
1312 }
1313 static DEVICE_ATTR_RO(modalias);
1314
1315 static ssize_t numa_node_show(struct device *dev,
1316                 struct device_attribute *attr, char *buf)
1317 {
1318         return sysfs_emit(buf, "%d\n", dev_to_node(dev));
1319 }
1320 static DEVICE_ATTR_RO(numa_node);
1321
1322 static ssize_t memmap_on_memory_show(struct device *dev,
1323                                      struct device_attribute *attr, char *buf)
1324 {
1325         struct dev_dax *dev_dax = to_dev_dax(dev);
1326
1327         return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory);
1328 }
1329
1330 static ssize_t memmap_on_memory_store(struct device *dev,
1331                                       struct device_attribute *attr,
1332                                       const char *buf, size_t len)
1333 {
1334         struct dev_dax *dev_dax = to_dev_dax(dev);
1335         bool val;
1336         int rc;
1337
1338         rc = kstrtobool(buf, &val);
1339         if (rc)
1340                 return rc;
1341
1342         if (val == true && !mhp_supports_memmap_on_memory()) {
1343                 dev_dbg(dev, "memmap_on_memory is not available\n");
1344                 return -EOPNOTSUPP;
1345         }
1346
1347         rc = down_write_killable(&dax_dev_rwsem);
1348         if (rc)
1349                 return rc;
1350
1351         if (dev_dax->memmap_on_memory != val && dev->driver &&
1352             to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) {
1353                 up_write(&dax_dev_rwsem);
1354                 return -EBUSY;
1355         }
1356
1357         dev_dax->memmap_on_memory = val;
1358         up_write(&dax_dev_rwsem);
1359
1360         return len;
1361 }
1362 static DEVICE_ATTR_RW(memmap_on_memory);
1363
1364 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
1365 {
1366         struct device *dev = container_of(kobj, struct device, kobj);
1367         struct dev_dax *dev_dax = to_dev_dax(dev);
1368         struct dax_region *dax_region = dev_dax->region;
1369
1370         if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0)
1371                 return 0;
1372         if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA))
1373                 return 0;
1374         if (a == &dev_attr_mapping.attr && is_static(dax_region))
1375                 return 0;
1376         if ((a == &dev_attr_align.attr ||
1377              a == &dev_attr_size.attr) && is_static(dax_region))
1378                 return 0444;
1379         return a->mode;
1380 }
1381
1382 static struct attribute *dev_dax_attributes[] = {
1383         &dev_attr_modalias.attr,
1384         &dev_attr_size.attr,
1385         &dev_attr_mapping.attr,
1386         &dev_attr_target_node.attr,
1387         &dev_attr_align.attr,
1388         &dev_attr_resource.attr,
1389         &dev_attr_numa_node.attr,
1390         &dev_attr_memmap_on_memory.attr,
1391         NULL,
1392 };
1393
1394 static const struct attribute_group dev_dax_attribute_group = {
1395         .attrs = dev_dax_attributes,
1396         .is_visible = dev_dax_visible,
1397 };
1398
1399 static const struct attribute_group *dax_attribute_groups[] = {
1400         &dev_dax_attribute_group,
1401         NULL,
1402 };
1403
1404 static void dev_dax_release(struct device *dev)
1405 {
1406         struct dev_dax *dev_dax = to_dev_dax(dev);
1407         struct dax_device *dax_dev = dev_dax->dax_dev;
1408
1409         put_dax(dax_dev);
1410         free_dev_dax_id(dev_dax);
1411         kfree(dev_dax->pgmap);
1412         kfree(dev_dax);
1413 }
1414
1415 static const struct device_type dev_dax_type = {
1416         .release = dev_dax_release,
1417         .groups = dax_attribute_groups,
1418 };
1419
1420 static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data)
1421 {
1422         struct dax_region *dax_region = data->dax_region;
1423         struct device *parent = dax_region->dev;
1424         struct dax_device *dax_dev;
1425         struct dev_dax *dev_dax;
1426         struct inode *inode;
1427         struct device *dev;
1428         int rc;
1429
1430         dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL);
1431         if (!dev_dax)
1432                 return ERR_PTR(-ENOMEM);
1433
1434         dev_dax->region = dax_region;
1435         if (is_static(dax_region)) {
1436                 if (dev_WARN_ONCE(parent, data->id < 0,
1437                                 "dynamic id specified to static region\n")) {
1438                         rc = -EINVAL;
1439                         goto err_id;
1440                 }
1441
1442                 dev_dax->id = data->id;
1443         } else {
1444                 if (dev_WARN_ONCE(parent, data->id >= 0,
1445                                 "static id specified to dynamic region\n")) {
1446                         rc = -EINVAL;
1447                         goto err_id;
1448                 }
1449
1450                 rc = alloc_dev_dax_id(dev_dax);
1451                 if (rc < 0)
1452                         goto err_id;
1453         }
1454
1455         dev = &dev_dax->dev;
1456         device_initialize(dev);
1457         dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
1458
1459         rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size);
1460         if (rc)
1461                 goto err_range;
1462
1463         if (data->pgmap) {
1464                 dev_WARN_ONCE(parent, !is_static(dax_region),
1465                         "custom dev_pagemap requires a static dax_region\n");
1466
1467                 dev_dax->pgmap = kmemdup(data->pgmap,
1468                                 sizeof(struct dev_pagemap), GFP_KERNEL);
1469                 if (!dev_dax->pgmap) {
1470                         rc = -ENOMEM;
1471                         goto err_pgmap;
1472                 }
1473         }
1474
1475         /*
1476          * No dax_operations since there is no access to this device outside of
1477          * mmap of the resulting character device.
1478          */
1479         dax_dev = alloc_dax(dev_dax, NULL);
1480         if (IS_ERR(dax_dev)) {
1481                 rc = PTR_ERR(dax_dev);
1482                 goto err_alloc_dax;
1483         }
1484         set_dax_synchronous(dax_dev);
1485         set_dax_nocache(dax_dev);
1486         set_dax_nomc(dax_dev);
1487
1488         /* a device_dax instance is dead while the driver is not attached */
1489         kill_dax(dax_dev);
1490
1491         dev_dax->dax_dev = dax_dev;
1492         dev_dax->target_node = dax_region->target_node;
1493         dev_dax->align = dax_region->align;
1494         ida_init(&dev_dax->ida);
1495
1496         dev_dax->memmap_on_memory = data->memmap_on_memory;
1497
1498         inode = dax_inode(dax_dev);
1499         dev->devt = inode->i_rdev;
1500         dev->bus = &dax_bus_type;
1501         dev->parent = parent;
1502         dev->type = &dev_dax_type;
1503
1504         rc = device_add(dev);
1505         if (rc) {
1506                 kill_dev_dax(dev_dax);
1507                 put_device(dev);
1508                 return ERR_PTR(rc);
1509         }
1510
1511         rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev);
1512         if (rc)
1513                 return ERR_PTR(rc);
1514
1515         /* register mapping device for the initial allocation range */
1516         if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) {
1517                 rc = devm_register_dax_mapping(dev_dax, 0);
1518                 if (rc)
1519                         return ERR_PTR(rc);
1520         }
1521
1522         return dev_dax;
1523
1524 err_alloc_dax:
1525         kfree(dev_dax->pgmap);
1526 err_pgmap:
1527         free_dev_dax_ranges(dev_dax);
1528 err_range:
1529         free_dev_dax_id(dev_dax);
1530 err_id:
1531         kfree(dev_dax);
1532
1533         return ERR_PTR(rc);
1534 }
1535
1536 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
1537 {
1538         struct dev_dax *dev_dax;
1539
1540         down_write(&dax_region_rwsem);
1541         dev_dax = __devm_create_dev_dax(data);
1542         up_write(&dax_region_rwsem);
1543
1544         return dev_dax;
1545 }
1546 EXPORT_SYMBOL_GPL(devm_create_dev_dax);
1547
1548 int __dax_driver_register(struct dax_device_driver *dax_drv,
1549                 struct module *module, const char *mod_name)
1550 {
1551         struct device_driver *drv = &dax_drv->drv;
1552
1553         /*
1554          * dax_bus_probe() calls dax_drv->probe() unconditionally.
1555          * So better be safe than sorry and ensure it is provided.
1556          */
1557         if (!dax_drv->probe)
1558                 return -EINVAL;
1559
1560         INIT_LIST_HEAD(&dax_drv->ids);
1561         drv->owner = module;
1562         drv->name = mod_name;
1563         drv->mod_name = mod_name;
1564         drv->bus = &dax_bus_type;
1565
1566         return driver_register(drv);
1567 }
1568 EXPORT_SYMBOL_GPL(__dax_driver_register);
1569
1570 void dax_driver_unregister(struct dax_device_driver *dax_drv)
1571 {
1572         struct device_driver *drv = &dax_drv->drv;
1573         struct dax_id *dax_id, *_id;
1574
1575         mutex_lock(&dax_bus_lock);
1576         list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
1577                 list_del(&dax_id->list);
1578                 kfree(dax_id);
1579         }
1580         mutex_unlock(&dax_bus_lock);
1581         driver_unregister(drv);
1582 }
1583 EXPORT_SYMBOL_GPL(dax_driver_unregister);
1584
1585 int __init dax_bus_init(void)
1586 {
1587         return bus_register(&dax_bus_type);
1588 }
1589
1590 void __exit dax_bus_exit(void)
1591 {
1592         bus_unregister(&dax_bus_type);
1593 }
This page took 0.114345 seconds and 4 git commands to generate.