]>
Commit | Line | Data |
---|---|---|
a8c879a7 AG |
1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | |
3 | * | |
4 | * This software is available to you under a choice of one of two | |
5 | * licenses. You may choose to be licensed under the terms of the GNU | |
6 | * General Public License (GPL) Version 2, available from the file | |
7 | * COPYING in the main directory of this source tree, or the | |
8 | * OpenIB.org BSD license below: | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or | |
11 | * without modification, are permitted provided that the following | |
12 | * conditions are met: | |
13 | * | |
14 | * - Redistributions of source code must retain the above | |
15 | * copyright notice, this list of conditions and the following | |
16 | * disclaimer. | |
17 | * | |
18 | * - Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials | |
21 | * provided with the distribution. | |
22 | * | |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
30 | * SOFTWARE. | |
31 | * | |
32 | */ | |
33 | #include <linux/percpu.h> | |
34 | #include <linux/seq_file.h> | |
35 | #include <linux/proc_fs.h> | |
36 | ||
37 | #include "rds.h" | |
38 | ||
39 | /* | |
40 | * This file implements a getsockopt() call which copies a set of fixed | |
41 | * sized structs into a user-specified buffer as a means of providing | |
42 | * read-only information about RDS. | |
43 | * | |
44 | * For a given information source there are a given number of fixed sized | |
45 | * structs at a given time. The structs are only copied if the user-specified | |
46 | * buffer is big enough. The destination pages that make up the buffer | |
47 | * are pinned for the duration of the copy. | |
48 | * | |
49 | * This gives us the following benefits: | |
50 | * | |
51 | * - simple implementation, no copy "position" across multiple calls | |
52 | * - consistent snapshot of an info source | |
53 | * - atomic copy works well with whatever locking info source has | |
54 | * - one portable tool to get rds info across implementations | |
55 | * - long-lived tool can get info without allocating | |
56 | * | |
57 | * at the following costs: | |
58 | * | |
59 | * - info source copy must be pinned, may be "large" | |
60 | */ | |
61 | ||
62 | struct rds_info_iterator { | |
63 | struct page **pages; | |
64 | void *addr; | |
65 | unsigned long offset; | |
66 | }; | |
67 | ||
68 | static DEFINE_SPINLOCK(rds_info_lock); | |
69 | static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; | |
70 | ||
71 | void rds_info_register_func(int optname, rds_info_func func) | |
72 | { | |
73 | int offset = optname - RDS_INFO_FIRST; | |
74 | ||
75 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | |
76 | ||
77 | spin_lock(&rds_info_lock); | |
78 | BUG_ON(rds_info_funcs[offset] != NULL); | |
79 | rds_info_funcs[offset] = func; | |
80 | spin_unlock(&rds_info_lock); | |
81 | } | |
82 | ||
83 | void rds_info_deregister_func(int optname, rds_info_func func) | |
84 | { | |
85 | int offset = optname - RDS_INFO_FIRST; | |
86 | ||
87 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | |
88 | ||
89 | spin_lock(&rds_info_lock); | |
90 | BUG_ON(rds_info_funcs[offset] != func); | |
91 | rds_info_funcs[offset] = NULL; | |
92 | spin_unlock(&rds_info_lock); | |
93 | } | |
94 | ||
95 | /* | |
96 | * Typically we hold an atomic kmap across multiple rds_info_copy() calls | |
97 | * because the kmap is so expensive. This must be called before using blocking | |
98 | * operations while holding the mapping and as the iterator is torn down. | |
99 | */ | |
100 | void rds_info_iter_unmap(struct rds_info_iterator *iter) | |
101 | { | |
102 | if (iter->addr != NULL) { | |
103 | kunmap_atomic(iter->addr, KM_USER0); | |
104 | iter->addr = NULL; | |
105 | } | |
106 | } | |
107 | ||
108 | /* | |
109 | * get_user_pages() called flush_dcache_page() on the pages for us. | |
110 | */ | |
111 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | |
112 | unsigned long bytes) | |
113 | { | |
114 | unsigned long this; | |
115 | ||
116 | while (bytes) { | |
117 | if (iter->addr == NULL) | |
118 | iter->addr = kmap_atomic(*iter->pages, KM_USER0); | |
119 | ||
120 | this = min(bytes, PAGE_SIZE - iter->offset); | |
121 | ||
122 | rdsdebug("page %p addr %p offset %lu this %lu data %p " | |
123 | "bytes %lu\n", *iter->pages, iter->addr, | |
124 | iter->offset, this, data, bytes); | |
125 | ||
126 | memcpy(iter->addr + iter->offset, data, this); | |
127 | ||
128 | data += this; | |
129 | bytes -= this; | |
130 | iter->offset += this; | |
131 | ||
132 | if (iter->offset == PAGE_SIZE) { | |
133 | kunmap_atomic(iter->addr, KM_USER0); | |
134 | iter->addr = NULL; | |
135 | iter->offset = 0; | |
136 | iter->pages++; | |
137 | } | |
138 | } | |
139 | } | |
140 | ||
141 | /* | |
142 | * @optval points to the userspace buffer that the information snapshot | |
143 | * will be copied into. | |
144 | * | |
145 | * @optlen on input is the size of the buffer in userspace. @optlen | |
146 | * on output is the size of the requested snapshot in bytes. | |
147 | * | |
148 | * This function returns -errno if there is a failure, particularly -ENOSPC | |
149 | * if the given userspace buffer was not large enough to fit the snapshot. | |
150 | * On success it returns the positive number of bytes of each array element | |
151 | * in the snapshot. | |
152 | */ | |
153 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | |
154 | int __user *optlen) | |
155 | { | |
156 | struct rds_info_iterator iter; | |
157 | struct rds_info_lengths lens; | |
158 | unsigned long nr_pages = 0; | |
159 | unsigned long start; | |
160 | unsigned long i; | |
161 | rds_info_func func; | |
162 | struct page **pages = NULL; | |
163 | int ret; | |
164 | int len; | |
165 | int total; | |
166 | ||
167 | if (get_user(len, optlen)) { | |
168 | ret = -EFAULT; | |
169 | goto out; | |
170 | } | |
171 | ||
172 | /* check for all kinds of wrapping and the like */ | |
173 | start = (unsigned long)optval; | |
174 | if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { | |
175 | ret = -EINVAL; | |
176 | goto out; | |
177 | } | |
178 | ||
179 | /* a 0 len call is just trying to probe its length */ | |
180 | if (len == 0) | |
181 | goto call_func; | |
182 | ||
183 | nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) | |
184 | >> PAGE_SHIFT; | |
185 | ||
186 | pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); | |
187 | if (pages == NULL) { | |
188 | ret = -ENOMEM; | |
189 | goto out; | |
190 | } | |
191 | down_read(¤t->mm->mmap_sem); | |
192 | ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0, | |
193 | pages, NULL); | |
194 | up_read(¤t->mm->mmap_sem); | |
195 | if (ret != nr_pages) { | |
196 | if (ret > 0) | |
197 | nr_pages = ret; | |
198 | else | |
199 | nr_pages = 0; | |
200 | ret = -EAGAIN; /* XXX ? */ | |
201 | goto out; | |
202 | } | |
203 | ||
204 | rdsdebug("len %d nr_pages %lu\n", len, nr_pages); | |
205 | ||
206 | call_func: | |
207 | func = rds_info_funcs[optname - RDS_INFO_FIRST]; | |
208 | if (func == NULL) { | |
209 | ret = -ENOPROTOOPT; | |
210 | goto out; | |
211 | } | |
212 | ||
213 | iter.pages = pages; | |
214 | iter.addr = NULL; | |
215 | iter.offset = start & (PAGE_SIZE - 1); | |
216 | ||
217 | func(sock, len, &iter, &lens); | |
218 | BUG_ON(lens.each == 0); | |
219 | ||
220 | total = lens.nr * lens.each; | |
221 | ||
222 | rds_info_iter_unmap(&iter); | |
223 | ||
224 | if (total > len) { | |
225 | len = total; | |
226 | ret = -ENOSPC; | |
227 | } else { | |
228 | len = total; | |
229 | ret = lens.each; | |
230 | } | |
231 | ||
232 | if (put_user(len, optlen)) | |
233 | ret = -EFAULT; | |
234 | ||
235 | out: | |
236 | for (i = 0; pages != NULL && i < nr_pages; i++) | |
237 | put_page(pages[i]); | |
238 | kfree(pages); | |
239 | ||
240 | return ret; | |
241 | } |