]>
Commit | Line | Data |
---|---|---|
ab68f262 | 1 | /* |
7b6be844 | 2 | * Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved. |
ab68f262 DW |
3 | * |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of version 2 of the GNU General Public License as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | */ | |
13 | #include <linux/pagemap.h> | |
14 | #include <linux/module.h> | |
15 | #include <linux/device.h> | |
16 | #include <linux/pfn_t.h> | |
ba09c01d | 17 | #include <linux/cdev.h> |
ab68f262 DW |
18 | #include <linux/slab.h> |
19 | #include <linux/dax.h> | |
20 | #include <linux/fs.h> | |
21 | #include <linux/mm.h> | |
ef842302 | 22 | #include <linux/mman.h> |
efebc711 | 23 | #include "dax-private.h" |
ccdb07f6 | 24 | #include "dax.h" |
ab68f262 | 25 | |
ab68f262 | 26 | static struct class *dax_class; |
ab68f262 | 27 | |
565851c9 DW |
28 | /* |
29 | * Rely on the fact that drvdata is set before the attributes are | |
30 | * registered, and that the attributes are unregistered before drvdata | |
31 | * is cleared to assume that drvdata is always valid. | |
32 | */ | |
d7fe1a67 DW |
33 | static ssize_t id_show(struct device *dev, |
34 | struct device_attribute *attr, char *buf) | |
35 | { | |
565851c9 | 36 | struct dax_region *dax_region = dev_get_drvdata(dev); |
d7fe1a67 | 37 | |
565851c9 | 38 | return sprintf(buf, "%d\n", dax_region->id); |
d7fe1a67 DW |
39 | } |
40 | static DEVICE_ATTR_RO(id); | |
41 | ||
42 | static ssize_t region_size_show(struct device *dev, | |
43 | struct device_attribute *attr, char *buf) | |
44 | { | |
565851c9 | 45 | struct dax_region *dax_region = dev_get_drvdata(dev); |
d7fe1a67 | 46 | |
565851c9 DW |
47 | return sprintf(buf, "%llu\n", (unsigned long long) |
48 | resource_size(&dax_region->res)); | |
d7fe1a67 DW |
49 | } |
50 | static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, | |
51 | region_size_show, NULL); | |
52 | ||
53 | static ssize_t align_show(struct device *dev, | |
54 | struct device_attribute *attr, char *buf) | |
55 | { | |
565851c9 | 56 | struct dax_region *dax_region = dev_get_drvdata(dev); |
d7fe1a67 | 57 | |
565851c9 | 58 | return sprintf(buf, "%u\n", dax_region->align); |
d7fe1a67 DW |
59 | } |
60 | static DEVICE_ATTR_RO(align); | |
61 | ||
62 | static struct attribute *dax_region_attributes[] = { | |
63 | &dev_attr_region_size.attr, | |
64 | &dev_attr_align.attr, | |
65 | &dev_attr_id.attr, | |
66 | NULL, | |
67 | }; | |
68 | ||
69 | static const struct attribute_group dax_region_attribute_group = { | |
70 | .name = "dax_region", | |
71 | .attrs = dax_region_attributes, | |
72 | }; | |
73 | ||
74 | static const struct attribute_group *dax_region_attribute_groups[] = { | |
75 | &dax_region_attribute_group, | |
76 | NULL, | |
77 | }; | |
78 | ||
ab68f262 DW |
79 | static void dax_region_free(struct kref *kref) |
80 | { | |
81 | struct dax_region *dax_region; | |
82 | ||
83 | dax_region = container_of(kref, struct dax_region, kref); | |
84 | kfree(dax_region); | |
85 | } | |
86 | ||
87 | void dax_region_put(struct dax_region *dax_region) | |
ab68f262 | 88 | { |
ab68f262 | 89 | kref_put(&dax_region->kref, dax_region_free); |
ab68f262 | 90 | } |
ab68f262 | 91 | EXPORT_SYMBOL_GPL(dax_region_put); |
ab68f262 | 92 | |
d7fe1a67 DW |
93 | static void dax_region_unregister(void *region) |
94 | { | |
95 | struct dax_region *dax_region = region; | |
96 | ||
97 | sysfs_remove_groups(&dax_region->dev->kobj, | |
98 | dax_region_attribute_groups); | |
99 | dax_region_put(dax_region); | |
100 | } | |
101 | ||
ab68f262 DW |
102 | struct dax_region *alloc_dax_region(struct device *parent, int region_id, |
103 | struct resource *res, unsigned int align, void *addr, | |
104 | unsigned long pfn_flags) | |
105 | { | |
106 | struct dax_region *dax_region; | |
107 | ||
d7fe1a67 DW |
108 | /* |
109 | * The DAX core assumes that it can store its private data in | |
110 | * parent->driver_data. This WARN is a reminder / safeguard for | |
111 | * developers of device-dax drivers. | |
112 | */ | |
113 | if (dev_get_drvdata(parent)) { | |
114 | dev_WARN(parent, "dax core failed to setup private data\n"); | |
115 | return NULL; | |
116 | } | |
117 | ||
9d2d01a0 DW |
118 | if (!IS_ALIGNED(res->start, align) |
119 | || !IS_ALIGNED(resource_size(res), align)) | |
120 | return NULL; | |
ab68f262 | 121 | |
9d2d01a0 | 122 | dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); |
ab68f262 DW |
123 | if (!dax_region) |
124 | return NULL; | |
125 | ||
d7fe1a67 | 126 | dev_set_drvdata(parent, dax_region); |
ab68f262 DW |
127 | memcpy(&dax_region->res, res, sizeof(*res)); |
128 | dax_region->pfn_flags = pfn_flags; | |
129 | kref_init(&dax_region->kref); | |
130 | dax_region->id = region_id; | |
131 | ida_init(&dax_region->ida); | |
132 | dax_region->align = align; | |
133 | dax_region->dev = parent; | |
134 | dax_region->base = addr; | |
d7fe1a67 DW |
135 | if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { |
136 | kfree(dax_region); | |
1c47a645 | 137 | return NULL; |
d7fe1a67 | 138 | } |
ab68f262 | 139 | |
d7fe1a67 DW |
140 | kref_get(&dax_region->kref); |
141 | if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) | |
142 | return NULL; | |
ab68f262 DW |
143 | return dax_region; |
144 | } | |
145 | EXPORT_SYMBOL_GPL(alloc_dax_region); | |
146 | ||
5f0694b3 | 147 | static struct dev_dax *to_dev_dax(struct device *dev) |
ebd84d72 | 148 | { |
5f0694b3 | 149 | return container_of(dev, struct dev_dax, dev); |
ebd84d72 DW |
150 | } |
151 | ||
ab68f262 DW |
152 | static ssize_t size_show(struct device *dev, |
153 | struct device_attribute *attr, char *buf) | |
154 | { | |
5f0694b3 | 155 | struct dev_dax *dev_dax = to_dev_dax(dev); |
ab68f262 DW |
156 | unsigned long long size = 0; |
157 | int i; | |
158 | ||
5f0694b3 DW |
159 | for (i = 0; i < dev_dax->num_resources; i++) |
160 | size += resource_size(&dev_dax->res[i]); | |
ab68f262 DW |
161 | |
162 | return sprintf(buf, "%llu\n", size); | |
163 | } | |
164 | static DEVICE_ATTR_RO(size); | |
165 | ||
5f0694b3 | 166 | static struct attribute *dev_dax_attributes[] = { |
ab68f262 DW |
167 | &dev_attr_size.attr, |
168 | NULL, | |
169 | }; | |
170 | ||
5f0694b3 DW |
171 | static const struct attribute_group dev_dax_attribute_group = { |
172 | .attrs = dev_dax_attributes, | |
ab68f262 DW |
173 | }; |
174 | ||
175 | static const struct attribute_group *dax_attribute_groups[] = { | |
5f0694b3 | 176 | &dev_dax_attribute_group, |
ab68f262 DW |
177 | NULL, |
178 | }; | |
179 | ||
5f0694b3 | 180 | static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, |
dee41079 DW |
181 | const char *func) |
182 | { | |
5f0694b3 DW |
183 | struct dax_region *dax_region = dev_dax->region; |
184 | struct device *dev = &dev_dax->dev; | |
dee41079 DW |
185 | unsigned long mask; |
186 | ||
7b6be844 | 187 | if (!dax_alive(dev_dax->dax_dev)) |
dee41079 DW |
188 | return -ENXIO; |
189 | ||
4cb19355 | 190 | /* prevent private mappings from being established */ |
325896ff | 191 | if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { |
5a14e91d JM |
192 | dev_info_ratelimited(dev, |
193 | "%s: %s: fail, attempted private mapping\n", | |
dee41079 DW |
194 | current->comm, func); |
195 | return -EINVAL; | |
196 | } | |
197 | ||
198 | mask = dax_region->align - 1; | |
199 | if (vma->vm_start & mask || vma->vm_end & mask) { | |
5a14e91d JM |
200 | dev_info_ratelimited(dev, |
201 | "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", | |
dee41079 DW |
202 | current->comm, func, vma->vm_start, vma->vm_end, |
203 | mask); | |
204 | return -EINVAL; | |
205 | } | |
206 | ||
207 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV | |
208 | && (vma->vm_flags & VM_DONTCOPY) == 0) { | |
5a14e91d JM |
209 | dev_info_ratelimited(dev, |
210 | "%s: %s: fail, dax range requires MADV_DONTFORK\n", | |
dee41079 DW |
211 | current->comm, func); |
212 | return -EINVAL; | |
213 | } | |
214 | ||
215 | if (!vma_is_dax(vma)) { | |
5a14e91d JM |
216 | dev_info_ratelimited(dev, |
217 | "%s: %s: fail, vma is not DAX capable\n", | |
dee41079 DW |
218 | current->comm, func); |
219 | return -EINVAL; | |
220 | } | |
221 | ||
222 | return 0; | |
223 | } | |
224 | ||
efebc711 | 225 | /* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */ |
73616367 | 226 | __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, |
dee41079 DW |
227 | unsigned long size) |
228 | { | |
229 | struct resource *res; | |
0a3ff786 RZ |
230 | /* gcc-4.6.3-nolibc for i386 complains that this is uninitialized */ |
231 | phys_addr_t uninitialized_var(phys); | |
dee41079 DW |
232 | int i; |
233 | ||
5f0694b3 DW |
234 | for (i = 0; i < dev_dax->num_resources; i++) { |
235 | res = &dev_dax->res[i]; | |
dee41079 DW |
236 | phys = pgoff * PAGE_SIZE + res->start; |
237 | if (phys >= res->start && phys <= res->end) | |
238 | break; | |
239 | pgoff -= PHYS_PFN(resource_size(res)); | |
240 | } | |
241 | ||
5f0694b3 DW |
242 | if (i < dev_dax->num_resources) { |
243 | res = &dev_dax->res[i]; | |
dee41079 DW |
244 | if (phys + size - 1 <= res->end) |
245 | return phys; | |
246 | } | |
247 | ||
248 | return -1; | |
249 | } | |
250 | ||
226ab561 | 251 | static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, |
2232c638 | 252 | struct vm_fault *vmf, pfn_t *pfn) |
dee41079 | 253 | { |
5f0694b3 | 254 | struct device *dev = &dev_dax->dev; |
dee41079 | 255 | struct dax_region *dax_region; |
dee41079 | 256 | phys_addr_t phys; |
0134ed4f | 257 | unsigned int fault_size = PAGE_SIZE; |
dee41079 | 258 | |
5f0694b3 | 259 | if (check_vma(dev_dax, vmf->vma, __func__)) |
dee41079 DW |
260 | return VM_FAULT_SIGBUS; |
261 | ||
5f0694b3 | 262 | dax_region = dev_dax->region; |
dee41079 | 263 | if (dax_region->align > PAGE_SIZE) { |
6daaca52 DW |
264 | dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", |
265 | dax_region->align, fault_size); | |
dee41079 DW |
266 | return VM_FAULT_SIGBUS; |
267 | } | |
268 | ||
0134ed4f DJ |
269 | if (fault_size != dax_region->align) |
270 | return VM_FAULT_SIGBUS; | |
271 | ||
73616367 | 272 | phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE); |
dee41079 | 273 | if (phys == -1) { |
6daaca52 | 274 | dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff); |
dee41079 DW |
275 | return VM_FAULT_SIGBUS; |
276 | } | |
277 | ||
2232c638 | 278 | *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); |
dee41079 | 279 | |
2232c638 | 280 | return vmf_insert_mixed(vmf->vma, vmf->address, *pfn); |
dee41079 DW |
281 | } |
282 | ||
226ab561 | 283 | static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, |
2232c638 | 284 | struct vm_fault *vmf, pfn_t *pfn) |
dee41079 | 285 | { |
d8a849e1 | 286 | unsigned long pmd_addr = vmf->address & PMD_MASK; |
5f0694b3 | 287 | struct device *dev = &dev_dax->dev; |
dee41079 DW |
288 | struct dax_region *dax_region; |
289 | phys_addr_t phys; | |
290 | pgoff_t pgoff; | |
0134ed4f | 291 | unsigned int fault_size = PMD_SIZE; |
dee41079 | 292 | |
5f0694b3 | 293 | if (check_vma(dev_dax, vmf->vma, __func__)) |
dee41079 DW |
294 | return VM_FAULT_SIGBUS; |
295 | ||
5f0694b3 | 296 | dax_region = dev_dax->region; |
dee41079 | 297 | if (dax_region->align > PMD_SIZE) { |
6daaca52 DW |
298 | dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", |
299 | dax_region->align, fault_size); | |
dee41079 DW |
300 | return VM_FAULT_SIGBUS; |
301 | } | |
302 | ||
303 | /* dax pmd mappings require pfn_t_devmap() */ | |
304 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { | |
6daaca52 | 305 | dev_dbg(dev, "region lacks devmap flags\n"); |
dee41079 DW |
306 | return VM_FAULT_SIGBUS; |
307 | } | |
308 | ||
0134ed4f DJ |
309 | if (fault_size < dax_region->align) |
310 | return VM_FAULT_SIGBUS; | |
311 | else if (fault_size > dax_region->align) | |
312 | return VM_FAULT_FALLBACK; | |
313 | ||
314 | /* if we are outside of the VMA */ | |
315 | if (pmd_addr < vmf->vma->vm_start || | |
316 | (pmd_addr + PMD_SIZE) > vmf->vma->vm_end) | |
317 | return VM_FAULT_SIGBUS; | |
318 | ||
f4200391 | 319 | pgoff = linear_page_index(vmf->vma, pmd_addr); |
73616367 | 320 | phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE); |
dee41079 | 321 | if (phys == -1) { |
6daaca52 | 322 | dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff); |
dee41079 DW |
323 | return VM_FAULT_SIGBUS; |
324 | } | |
325 | ||
2232c638 | 326 | *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); |
dee41079 | 327 | |
2232c638 | 328 | return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn, |
d8a849e1 | 329 | vmf->flags & FAULT_FLAG_WRITE); |
dee41079 DW |
330 | } |
331 | ||
9557feee | 332 | #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD |
226ab561 | 333 | static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, |
2232c638 | 334 | struct vm_fault *vmf, pfn_t *pfn) |
9557feee DJ |
335 | { |
336 | unsigned long pud_addr = vmf->address & PUD_MASK; | |
5f0694b3 | 337 | struct device *dev = &dev_dax->dev; |
9557feee DJ |
338 | struct dax_region *dax_region; |
339 | phys_addr_t phys; | |
340 | pgoff_t pgoff; | |
70b085b0 DJ |
341 | unsigned int fault_size = PUD_SIZE; |
342 | ||
9557feee | 343 | |
5f0694b3 | 344 | if (check_vma(dev_dax, vmf->vma, __func__)) |
9557feee DJ |
345 | return VM_FAULT_SIGBUS; |
346 | ||
5f0694b3 | 347 | dax_region = dev_dax->region; |
9557feee | 348 | if (dax_region->align > PUD_SIZE) { |
6daaca52 DW |
349 | dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", |
350 | dax_region->align, fault_size); | |
9557feee DJ |
351 | return VM_FAULT_SIGBUS; |
352 | } | |
353 | ||
354 | /* dax pud mappings require pfn_t_devmap() */ | |
355 | if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { | |
6daaca52 | 356 | dev_dbg(dev, "region lacks devmap flags\n"); |
9557feee DJ |
357 | return VM_FAULT_SIGBUS; |
358 | } | |
359 | ||
70b085b0 DJ |
360 | if (fault_size < dax_region->align) |
361 | return VM_FAULT_SIGBUS; | |
362 | else if (fault_size > dax_region->align) | |
363 | return VM_FAULT_FALLBACK; | |
364 | ||
365 | /* if we are outside of the VMA */ | |
366 | if (pud_addr < vmf->vma->vm_start || | |
367 | (pud_addr + PUD_SIZE) > vmf->vma->vm_end) | |
368 | return VM_FAULT_SIGBUS; | |
369 | ||
9557feee | 370 | pgoff = linear_page_index(vmf->vma, pud_addr); |
73616367 | 371 | phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE); |
9557feee | 372 | if (phys == -1) { |
6daaca52 | 373 | dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff); |
9557feee DJ |
374 | return VM_FAULT_SIGBUS; |
375 | } | |
376 | ||
2232c638 | 377 | *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); |
9557feee | 378 | |
2232c638 | 379 | return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn, |
9557feee DJ |
380 | vmf->flags & FAULT_FLAG_WRITE); |
381 | } | |
382 | #else | |
226ab561 | 383 | static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, |
2232c638 | 384 | struct vm_fault *vmf, pfn_t *pfn) |
9557feee DJ |
385 | { |
386 | return VM_FAULT_FALLBACK; | |
387 | } | |
388 | #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ | |
389 | ||
226ab561 | 390 | static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, |
c791ace1 | 391 | enum page_entry_size pe_size) |
dee41079 | 392 | { |
f4200391 | 393 | struct file *filp = vmf->vma->vm_file; |
2232c638 | 394 | unsigned long fault_size; |
36bdac1e SJ |
395 | vm_fault_t rc = VM_FAULT_SIGBUS; |
396 | int id; | |
2232c638 | 397 | pfn_t pfn; |
5f0694b3 | 398 | struct dev_dax *dev_dax = filp->private_data; |
dee41079 | 399 | |
6daaca52 DW |
400 | dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, |
401 | (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read", | |
76202620 | 402 | vmf->vma->vm_start, vmf->vma->vm_end, pe_size); |
dee41079 | 403 | |
7b6be844 | 404 | id = dax_read_lock(); |
c791ace1 DJ |
405 | switch (pe_size) { |
406 | case PE_SIZE_PTE: | |
2232c638 DW |
407 | fault_size = PAGE_SIZE; |
408 | rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); | |
a2d58167 | 409 | break; |
c791ace1 | 410 | case PE_SIZE_PMD: |
2232c638 DW |
411 | fault_size = PMD_SIZE; |
412 | rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); | |
9557feee | 413 | break; |
c791ace1 | 414 | case PE_SIZE_PUD: |
2232c638 DW |
415 | fault_size = PUD_SIZE; |
416 | rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); | |
a2d58167 DJ |
417 | break; |
418 | default: | |
54eafcc9 | 419 | rc = VM_FAULT_SIGBUS; |
a2d58167 | 420 | } |
2232c638 DW |
421 | |
422 | if (rc == VM_FAULT_NOPAGE) { | |
423 | unsigned long i; | |
35de2995 | 424 | pgoff_t pgoff; |
2232c638 DW |
425 | |
426 | /* | |
427 | * In the device-dax case the only possibility for a | |
428 | * VM_FAULT_NOPAGE result is when device-dax capacity is | |
429 | * mapped. No need to consider the zero page, or racing | |
430 | * conflicting mappings. | |
431 | */ | |
35de2995 DW |
432 | pgoff = linear_page_index(vmf->vma, vmf->address |
433 | & ~(fault_size - 1)); | |
2232c638 DW |
434 | for (i = 0; i < fault_size / PAGE_SIZE; i++) { |
435 | struct page *page; | |
436 | ||
437 | page = pfn_to_page(pfn_t_to_pfn(pfn) + i); | |
438 | if (page->mapping) | |
439 | continue; | |
440 | page->mapping = filp->f_mapping; | |
35de2995 | 441 | page->index = pgoff + i; |
2232c638 DW |
442 | } |
443 | } | |
7b6be844 | 444 | dax_read_unlock(id); |
dee41079 DW |
445 | |
446 | return rc; | |
447 | } | |
448 | ||
226ab561 | 449 | static vm_fault_t dev_dax_fault(struct vm_fault *vmf) |
c791ace1 | 450 | { |
5f0694b3 | 451 | return dev_dax_huge_fault(vmf, PE_SIZE_PTE); |
c791ace1 DJ |
452 | } |
453 | ||
9702cffd DW |
454 | static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr) |
455 | { | |
456 | struct file *filp = vma->vm_file; | |
457 | struct dev_dax *dev_dax = filp->private_data; | |
458 | struct dax_region *dax_region = dev_dax->region; | |
459 | ||
460 | if (!IS_ALIGNED(addr, dax_region->align)) | |
461 | return -EINVAL; | |
462 | return 0; | |
463 | } | |
464 | ||
c1d53b92 DW |
465 | static unsigned long dev_dax_pagesize(struct vm_area_struct *vma) |
466 | { | |
467 | struct file *filp = vma->vm_file; | |
468 | struct dev_dax *dev_dax = filp->private_data; | |
469 | struct dax_region *dax_region = dev_dax->region; | |
470 | ||
471 | return dax_region->align; | |
472 | } | |
473 | ||
5f0694b3 DW |
474 | static const struct vm_operations_struct dax_vm_ops = { |
475 | .fault = dev_dax_fault, | |
476 | .huge_fault = dev_dax_huge_fault, | |
9702cffd | 477 | .split = dev_dax_split, |
c1d53b92 | 478 | .pagesize = dev_dax_pagesize, |
dee41079 DW |
479 | }; |
480 | ||
af69f51e | 481 | static int dax_mmap(struct file *filp, struct vm_area_struct *vma) |
dee41079 | 482 | { |
5f0694b3 | 483 | struct dev_dax *dev_dax = filp->private_data; |
7b6be844 | 484 | int rc, id; |
dee41079 | 485 | |
6daaca52 | 486 | dev_dbg(&dev_dax->dev, "trace\n"); |
dee41079 | 487 | |
7b6be844 DW |
488 | /* |
489 | * We lock to check dax_dev liveness and will re-check at | |
490 | * fault time. | |
491 | */ | |
492 | id = dax_read_lock(); | |
5f0694b3 | 493 | rc = check_vma(dev_dax, vma, __func__); |
7b6be844 | 494 | dax_read_unlock(id); |
dee41079 DW |
495 | if (rc) |
496 | return rc; | |
497 | ||
5f0694b3 | 498 | vma->vm_ops = &dax_vm_ops; |
e1fb4a08 | 499 | vma->vm_flags |= VM_HUGEPAGE; |
dee41079 | 500 | return 0; |
043a9255 DW |
501 | } |
502 | ||
503 | /* return an unmapped area aligned to the dax region specified alignment */ | |
af69f51e | 504 | static unsigned long dax_get_unmapped_area(struct file *filp, |
043a9255 DW |
505 | unsigned long addr, unsigned long len, unsigned long pgoff, |
506 | unsigned long flags) | |
507 | { | |
508 | unsigned long off, off_end, off_align, len_align, addr_align, align; | |
5f0694b3 | 509 | struct dev_dax *dev_dax = filp ? filp->private_data : NULL; |
043a9255 DW |
510 | struct dax_region *dax_region; |
511 | ||
5f0694b3 | 512 | if (!dev_dax || addr) |
043a9255 DW |
513 | goto out; |
514 | ||
5f0694b3 | 515 | dax_region = dev_dax->region; |
043a9255 DW |
516 | align = dax_region->align; |
517 | off = pgoff << PAGE_SHIFT; | |
518 | off_end = off + len; | |
519 | off_align = round_up(off, align); | |
520 | ||
521 | if ((off_end <= off_align) || ((off_end - off_align) < align)) | |
522 | goto out; | |
523 | ||
524 | len_align = len + align; | |
525 | if ((off + len_align) < off) | |
526 | goto out; | |
dee41079 | 527 | |
043a9255 DW |
528 | addr_align = current->mm->get_unmapped_area(filp, addr, len_align, |
529 | pgoff, flags); | |
530 | if (!IS_ERR_VALUE(addr_align)) { | |
531 | addr_align += (off - addr_align) & (align - 1); | |
532 | return addr_align; | |
533 | } | |
534 | out: | |
535 | return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); | |
536 | } | |
537 | ||
41c9b1be DJ |
538 | static const struct address_space_operations dev_dax_aops = { |
539 | .set_page_dirty = noop_set_page_dirty, | |
540 | .invalidatepage = noop_invalidatepage, | |
541 | }; | |
542 | ||
af69f51e | 543 | static int dax_open(struct inode *inode, struct file *filp) |
043a9255 | 544 | { |
7b6be844 DW |
545 | struct dax_device *dax_dev = inode_dax(inode); |
546 | struct inode *__dax_inode = dax_inode(dax_dev); | |
547 | struct dev_dax *dev_dax = dax_get_private(dax_dev); | |
043a9255 | 548 | |
6daaca52 | 549 | dev_dbg(&dev_dax->dev, "trace\n"); |
7b6be844 DW |
550 | inode->i_mapping = __dax_inode->i_mapping; |
551 | inode->i_mapping->host = __dax_inode; | |
41c9b1be | 552 | inode->i_mapping->a_ops = &dev_dax_aops; |
3bc52c45 | 553 | filp->f_mapping = inode->i_mapping; |
5660e13d | 554 | filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); |
5f0694b3 | 555 | filp->private_data = dev_dax; |
ebd84d72 | 556 | inode->i_flags = S_DAX; |
043a9255 | 557 | |
043a9255 DW |
558 | return 0; |
559 | } | |
dee41079 | 560 | |
af69f51e | 561 | static int dax_release(struct inode *inode, struct file *filp) |
043a9255 | 562 | { |
5f0694b3 | 563 | struct dev_dax *dev_dax = filp->private_data; |
043a9255 | 564 | |
6daaca52 | 565 | dev_dbg(&dev_dax->dev, "trace\n"); |
043a9255 | 566 | return 0; |
dee41079 DW |
567 | } |
568 | ||
ab68f262 DW |
569 | static const struct file_operations dax_fops = { |
570 | .llseek = noop_llseek, | |
571 | .owner = THIS_MODULE, | |
af69f51e DW |
572 | .open = dax_open, |
573 | .release = dax_release, | |
574 | .get_unmapped_area = dax_get_unmapped_area, | |
575 | .mmap = dax_mmap, | |
ef842302 | 576 | .mmap_supported_flags = MAP_SYNC, |
ab68f262 DW |
577 | }; |
578 | ||
5f0694b3 | 579 | static void dev_dax_release(struct device *dev) |
043a9255 | 580 | { |
5f0694b3 DW |
581 | struct dev_dax *dev_dax = to_dev_dax(dev); |
582 | struct dax_region *dax_region = dev_dax->region; | |
7b6be844 | 583 | struct dax_device *dax_dev = dev_dax->dax_dev; |
043a9255 | 584 | |
bbb3be17 DW |
585 | if (dev_dax->id >= 0) |
586 | ida_simple_remove(&dax_region->ida, dev_dax->id); | |
ebd84d72 | 587 | dax_region_put(dax_region); |
7b6be844 | 588 | put_dax(dax_dev); |
5f0694b3 | 589 | kfree(dev_dax); |
ebd84d72 DW |
590 | } |
591 | ||
5f0694b3 | 592 | static void kill_dev_dax(struct dev_dax *dev_dax) |
ebd84d72 | 593 | { |
7b6be844 DW |
594 | struct dax_device *dax_dev = dev_dax->dax_dev; |
595 | struct inode *inode = dax_inode(dax_dev); | |
596 | ||
597 | kill_dax(dax_dev); | |
598 | unmap_mapping_range(inode->i_mapping, 0, 0, 1); | |
ebd84d72 DW |
599 | } |
600 | ||
5f0694b3 | 601 | static void unregister_dev_dax(void *dev) |
ebd84d72 | 602 | { |
5f0694b3 | 603 | struct dev_dax *dev_dax = to_dev_dax(dev); |
7b6be844 DW |
604 | struct dax_device *dax_dev = dev_dax->dax_dev; |
605 | struct inode *inode = dax_inode(dax_dev); | |
606 | struct cdev *cdev = inode->i_cdev; | |
ebd84d72 | 607 | |
6daaca52 | 608 | dev_dbg(dev, "trace\n"); |
043a9255 | 609 | |
5f0694b3 | 610 | kill_dev_dax(dev_dax); |
7b6be844 | 611 | cdev_device_del(cdev, dev); |
92a3fa07 | 612 | put_device(dev); |
043a9255 DW |
613 | } |
614 | ||
5f0694b3 | 615 | struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, |
bbb3be17 | 616 | int id, struct resource *res, int count) |
043a9255 DW |
617 | { |
618 | struct device *parent = dax_region->dev; | |
7b6be844 | 619 | struct dax_device *dax_dev; |
5f0694b3 | 620 | struct dev_dax *dev_dax; |
7b6be844 | 621 | struct inode *inode; |
043a9255 | 622 | struct device *dev; |
ba09c01d | 623 | struct cdev *cdev; |
43fe51e1 DW |
624 | int rc, i; |
625 | ||
626 | if (!count) | |
627 | return ERR_PTR(-EINVAL); | |
043a9255 | 628 | |
acafe7e3 | 629 | dev_dax = kzalloc(struct_size(dev_dax, res, count), GFP_KERNEL); |
5f0694b3 | 630 | if (!dev_dax) |
d76911ee | 631 | return ERR_PTR(-ENOMEM); |
043a9255 | 632 | |
9d2d01a0 DW |
633 | for (i = 0; i < count; i++) { |
634 | if (!IS_ALIGNED(res[i].start, dax_region->align) | |
635 | || !IS_ALIGNED(resource_size(&res[i]), | |
636 | dax_region->align)) { | |
637 | rc = -EINVAL; | |
638 | break; | |
639 | } | |
5f0694b3 DW |
640 | dev_dax->res[i].start = res[i].start; |
641 | dev_dax->res[i].end = res[i].end; | |
9d2d01a0 DW |
642 | } |
643 | ||
644 | if (i < count) | |
645 | goto err_id; | |
646 | ||
bbb3be17 DW |
647 | if (id < 0) { |
648 | id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); | |
649 | dev_dax->id = id; | |
650 | if (id < 0) { | |
651 | rc = id; | |
652 | goto err_id; | |
653 | } | |
654 | } else { | |
655 | /* region provider owns @id lifetime */ | |
656 | dev_dax->id = -1; | |
043a9255 DW |
657 | } |
658 | ||
6568b08b DW |
659 | /* |
660 | * No 'host' or dax_operations since there is no access to this | |
661 | * device outside of mmap of the resulting character device. | |
662 | */ | |
663 | dax_dev = alloc_dax(dev_dax, NULL, NULL); | |
43fe51e1 DW |
664 | if (!dax_dev) { |
665 | rc = -ENOMEM; | |
7b6be844 | 666 | goto err_dax; |
43fe51e1 | 667 | } |
3bc52c45 | 668 | |
7b6be844 | 669 | /* from here on we're committed to teardown via dax_dev_release() */ |
5f0694b3 | 670 | dev = &dev_dax->dev; |
ebd84d72 | 671 | device_initialize(dev); |
ba09c01d | 672 | |
7b6be844 DW |
673 | inode = dax_inode(dax_dev); |
674 | cdev = inode->i_cdev; | |
ba09c01d DW |
675 | cdev_init(cdev, &dax_fops); |
676 | cdev->owner = parent->driver->owner; | |
ba09c01d | 677 | |
5f0694b3 | 678 | dev_dax->num_resources = count; |
7b6be844 | 679 | dev_dax->dax_dev = dax_dev; |
5f0694b3 | 680 | dev_dax->region = dax_region; |
ba09c01d DW |
681 | kref_get(&dax_region->kref); |
682 | ||
7b6be844 | 683 | dev->devt = inode->i_rdev; |
ebd84d72 DW |
684 | dev->class = dax_class; |
685 | dev->parent = parent; | |
686 | dev->groups = dax_attribute_groups; | |
5f0694b3 | 687 | dev->release = dev_dax_release; |
bbb3be17 | 688 | dev_set_name(dev, "dax%d.%d", dax_region->id, id); |
92a3fa07 LG |
689 | |
690 | rc = cdev_device_add(cdev, dev); | |
ebd84d72 | 691 | if (rc) { |
5f0694b3 | 692 | kill_dev_dax(dev_dax); |
ebd84d72 | 693 | put_device(dev); |
d76911ee | 694 | return ERR_PTR(rc); |
ebd84d72 | 695 | } |
043a9255 | 696 | |
5f0694b3 | 697 | rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); |
d76911ee DW |
698 | if (rc) |
699 | return ERR_PTR(rc); | |
700 | ||
5f0694b3 | 701 | return dev_dax; |
043a9255 | 702 | |
7b6be844 | 703 | err_dax: |
bbb3be17 DW |
704 | if (dev_dax->id >= 0) |
705 | ida_simple_remove(&dax_region->ida, dev_dax->id); | |
043a9255 | 706 | err_id: |
5f0694b3 | 707 | kfree(dev_dax); |
043a9255 | 708 | |
d76911ee | 709 | return ERR_PTR(rc); |
043a9255 | 710 | } |
5f0694b3 | 711 | EXPORT_SYMBOL_GPL(devm_create_dev_dax); |
043a9255 | 712 | |
ab68f262 DW |
713 | static int __init dax_init(void) |
714 | { | |
ab68f262 | 715 | dax_class = class_create(THIS_MODULE, "dax"); |
7b6be844 | 716 | return PTR_ERR_OR_ZERO(dax_class); |
ab68f262 DW |
717 | } |
718 | ||
719 | static void __exit dax_exit(void) | |
720 | { | |
721 | class_destroy(dax_class); | |
ab68f262 DW |
722 | } |
723 | ||
724 | MODULE_AUTHOR("Intel Corporation"); | |
725 | MODULE_LICENSE("GPL v2"); | |
726 | subsys_initcall(dax_init); | |
727 | module_exit(dax_exit); |