]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
5db11c21 | 2 | /* |
382f4581 | 3 | * Copyright 2017 Omnibond Systems, L.L.C. |
5db11c21 MM |
4 | */ |
5 | ||
6 | #include "protocol.h" | |
575e9461 MM |
7 | #include "orangefs-kernel.h" |
8 | #include "orangefs-bufmap.h" | |
5db11c21 | 9 | |
480e3e53 MB |
10 | struct orangefs_dir_part { |
11 | struct orangefs_dir_part *next; | |
12 | size_t len; | |
13 | }; | |
14 | ||
15 | struct orangefs_dir { | |
16 | __u64 token; | |
17 | struct orangefs_dir_part *part; | |
18 | loff_t end; | |
19 | int error; | |
20 | }; | |
21 | ||
22 | #define PART_SHIFT (24) | |
23 | #define PART_SIZE (1<<24) | |
24 | #define PART_MASK (~(PART_SIZE - 1)) | |
25 | ||
5db11c21 | 26 | /* |
382f4581 MB |
27 | * There can be up to 512 directory entries. Each entry is encoded as |
28 | * follows: | |
29 | * 4 bytes: string size (n) | |
30 | * n bytes: string | |
31 | * 1 byte: trailing zero | |
32 | * padding to 8 bytes | |
33 | * 16 bytes: khandle | |
34 | * padding to 8 bytes | |
382f4581 MB |
35 | * |
36 | * The trailer_buf starts with a struct orangefs_readdir_response_s | |
37 | * which must be skipped to get to the directory data. | |
480e3e53 MB |
38 | * |
39 | * The data which is received from the userspace daemon is termed a | |
40 | * part and is stored in a linked list in case more than one part is | |
41 | * needed for a large directory. | |
42 | * | |
43 | * The position pointer (ctx->pos) encodes the part and offset on which | |
44 | * to begin reading at. Bits above PART_SHIFT encode the part and bits | |
45 | * below PART_SHIFT encode the offset. Parts are stored in a linked | |
46 | * list which grows as data is received from the server. The overhead | |
47 | * associated with managing the list is presumed to be small compared to | |
48 | * the overhead of communicating with the server. | |
49 | * | |
50 | * As data is received from the server, it is placed at the end of the | |
51 | * part list. Data is parsed from the current position as it is needed. | |
52 | * When data is determined to be corrupt, it is either because the | |
53 | * userspace component has sent back corrupt data or because the file | |
54 | * pointer has been moved to an invalid location. Since the two cannot | |
55 | * be differentiated, return EIO. | |
56 | * | |
57 | * Part zero is synthesized to contains `.' and `..'. Part one is the | |
58 | * first part of the part list. | |
5db11c21 | 59 | */ |
5db11c21 | 60 | |
480e3e53 MB |
61 | static int do_readdir(struct orangefs_inode_s *oi, |
62 | struct orangefs_dir *od, struct dentry *dentry, | |
63 | struct orangefs_kernel_op_s *op) | |
382f4581 | 64 | { |
382f4581 | 65 | struct orangefs_readdir_response_s *resp; |
382f4581 MB |
66 | int bufi, r; |
67 | ||
ee3b8d37 | 68 | /* |
382f4581 MB |
69 | * Despite the badly named field, readdir does not use shared |
70 | * memory. However, there are a limited number of readdir | |
71 | * slots, which must be allocated here. This flag simply tells | |
72 | * the op scheduler to return the op here for retry. | |
ee3b8d37 | 73 | */ |
382f4581 MB |
74 | op->uses_shared_memory = 1; |
75 | op->upcall.req.readdir.refn = oi->refn; | |
76 | op->upcall.req.readdir.token = od->token; | |
77 | op->upcall.req.readdir.max_dirent_count = | |
7d221485 | 78 | ORANGEFS_MAX_DIRENT_COUNT_READDIR; |
5db11c21 | 79 | |
382f4581 MB |
80 | again: |
81 | bufi = orangefs_readdir_index_get(); | |
82 | if (bufi < 0) { | |
382f4581 MB |
83 | od->error = bufi; |
84 | return bufi; | |
5db11c21 | 85 | } |
5db11c21 | 86 | |
382f4581 | 87 | op->upcall.req.readdir.buf_index = bufi; |
5db11c21 | 88 | |
382f4581 MB |
89 | r = service_operation(op, "orangefs_readdir", |
90 | get_interruptible_flag(dentry->d_inode)); | |
5db11c21 | 91 | |
382f4581 | 92 | orangefs_readdir_index_put(bufi); |
ee3b8d37 | 93 | |
382f4581 MB |
94 | if (op_state_purged(op)) { |
95 | if (r == -EAGAIN) { | |
96 | vfree(op->downcall.trailer_buf); | |
97 | goto again; | |
98 | } else if (r == -EIO) { | |
99 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
100 | od->error = r; |
101 | return r; | |
102 | } | |
5db11c21 MM |
103 | } |
104 | ||
382f4581 MB |
105 | if (r < 0) { |
106 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
107 | od->error = r; |
108 | return r; | |
109 | } else if (op->downcall.status) { | |
110 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
111 | od->error = op->downcall.status; |
112 | return op->downcall.status; | |
113 | } | |
114 | ||
480e3e53 MB |
115 | /* |
116 | * The maximum size is size per entry times the 512 entries plus | |
117 | * the header. This is well under the limit. | |
118 | */ | |
119 | if (op->downcall.trailer_size > PART_SIZE) { | |
120 | vfree(op->downcall.trailer_buf); | |
121 | od->error = -EIO; | |
122 | return -EIO; | |
123 | } | |
124 | ||
382f4581 MB |
125 | resp = (struct orangefs_readdir_response_s *) |
126 | op->downcall.trailer_buf; | |
127 | od->token = resp->token; | |
480e3e53 MB |
128 | return 0; |
129 | } | |
382f4581 | 130 | |
480e3e53 MB |
131 | static int parse_readdir(struct orangefs_dir *od, |
132 | struct orangefs_kernel_op_s *op) | |
133 | { | |
134 | struct orangefs_dir_part *part, *new; | |
135 | size_t count; | |
136 | ||
137 | count = 1; | |
138 | part = od->part; | |
2f713b5c | 139 | while (part) { |
480e3e53 | 140 | count++; |
2f713b5c MB |
141 | if (part->next) |
142 | part = part->next; | |
143 | else | |
144 | break; | |
382f4581 MB |
145 | } |
146 | ||
480e3e53 MB |
147 | new = (void *)op->downcall.trailer_buf; |
148 | new->next = NULL; | |
149 | new->len = op->downcall.trailer_size - | |
150 | sizeof(struct orangefs_readdir_response_s); | |
151 | if (!od->part) | |
152 | od->part = new; | |
153 | else | |
154 | part->next = new; | |
155 | count++; | |
156 | od->end = count << PART_SHIFT; | |
157 | ||
382f4581 MB |
158 | return 0; |
159 | } | |
9f5e2f7f | 160 | |
480e3e53 MB |
161 | static int orangefs_dir_more(struct orangefs_inode_s *oi, |
162 | struct orangefs_dir *od, struct dentry *dentry) | |
163 | { | |
164 | struct orangefs_kernel_op_s *op; | |
165 | int r; | |
166 | ||
167 | op = op_alloc(ORANGEFS_VFS_OP_READDIR); | |
168 | if (!op) { | |
169 | od->error = -ENOMEM; | |
170 | return -ENOMEM; | |
171 | } | |
172 | r = do_readdir(oi, od, dentry, op); | |
173 | if (r) { | |
174 | od->error = r; | |
175 | goto out; | |
176 | } | |
177 | r = parse_readdir(od, op); | |
178 | if (r) { | |
179 | od->error = r; | |
180 | goto out; | |
181 | } | |
182 | ||
183 | od->error = 0; | |
184 | out: | |
185 | op_release(op); | |
186 | return od->error; | |
187 | } | |
188 | ||
189 | static int fill_from_part(struct orangefs_dir_part *part, | |
382f4581 MB |
190 | struct dir_context *ctx) |
191 | { | |
480e3e53 | 192 | const int offset = sizeof(struct orangefs_readdir_response_s); |
382f4581 MB |
193 | struct orangefs_khandle *khandle; |
194 | __u32 *len, padlen; | |
72f66b83 | 195 | loff_t i; |
382f4581 | 196 | char *s; |
480e3e53 MB |
197 | i = ctx->pos & ~PART_MASK; |
198 | ||
199 | /* The file offset from userspace is too large. */ | |
200 | if (i > part->len) | |
bf15ba7c MB |
201 | return 1; |
202 | ||
203 | /* | |
204 | * If the seek pointer is positioned just before an entry it | |
205 | * should find the next entry. | |
206 | */ | |
207 | if (i % 8) | |
208 | i = i + (8 - i%8)%8; | |
480e3e53 MB |
209 | |
210 | while (i < part->len) { | |
211 | if (part->len < i + sizeof *len) | |
bf15ba7c | 212 | break; |
480e3e53 | 213 | len = (void *)part + offset + i; |
382f4581 MB |
214 | /* |
215 | * len is the size of the string itself. padlen is the | |
216 | * total size of the encoded string. | |
217 | */ | |
218 | padlen = (sizeof *len + *len + 1) + | |
480e3e53 MB |
219 | (8 - (sizeof *len + *len + 1)%8)%8; |
220 | if (part->len < i + padlen + sizeof *khandle) | |
bf15ba7c | 221 | goto next; |
480e3e53 | 222 | s = (void *)part + offset + i + sizeof *len; |
382f4581 | 223 | if (s[*len] != 0) |
bf15ba7c | 224 | goto next; |
480e3e53 | 225 | khandle = (void *)part + offset + i + padlen; |
382f4581 | 226 | if (!dir_emit(ctx, s, *len, |
480e3e53 MB |
227 | orangefs_khandle_to_ino(khandle), |
228 | DT_UNKNOWN)) | |
382f4581 | 229 | return 0; |
72f66b83 MB |
230 | i += padlen + sizeof *khandle; |
231 | i = i + (8 - i%8)%8; | |
480e3e53 MB |
232 | BUG_ON(i > part->len); |
233 | ctx->pos = (ctx->pos & PART_MASK) | i; | |
bf15ba7c MB |
234 | continue; |
235 | next: | |
236 | i += 8; | |
480e3e53 MB |
237 | } |
238 | return 1; | |
239 | } | |
240 | ||
241 | static int orangefs_dir_fill(struct orangefs_inode_s *oi, | |
242 | struct orangefs_dir *od, struct dentry *dentry, | |
243 | struct dir_context *ctx) | |
244 | { | |
245 | struct orangefs_dir_part *part; | |
246 | size_t count; | |
247 | ||
248 | count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; | |
249 | ||
250 | part = od->part; | |
251 | while (part->next && count) { | |
252 | count--; | |
253 | part = part->next; | |
254 | } | |
255 | /* This means the userspace file offset is invalid. */ | |
256 | if (count) { | |
257 | od->error = -EIO; | |
258 | return -EIO; | |
259 | } | |
260 | ||
261 | while (part && part->len) { | |
262 | int r; | |
263 | r = fill_from_part(part, ctx); | |
264 | if (r < 0) { | |
265 | od->error = r; | |
266 | return r; | |
267 | } else if (r == 0) { | |
268 | /* Userspace buffer is full. */ | |
269 | break; | |
270 | } else { | |
271 | /* | |
272 | * The part ran out of data. Move to the next | |
273 | * part. */ | |
274 | ctx->pos = (ctx->pos & PART_MASK) + | |
275 | (1 << PART_SHIFT); | |
276 | part = part->next; | |
277 | } | |
382f4581 | 278 | } |
382f4581 | 279 | return 0; |
382f4581 | 280 | } |
5db11c21 | 281 | |
942835d6 MB |
282 | static loff_t orangefs_dir_llseek(struct file *file, loff_t offset, |
283 | int whence) | |
284 | { | |
285 | struct orangefs_dir *od = file->private_data; | |
286 | /* | |
287 | * Delete the stored data so userspace sees new directory | |
288 | * entries. | |
289 | */ | |
290 | if (!whence && offset < od->end) { | |
291 | struct orangefs_dir_part *part = od->part; | |
292 | while (part) { | |
293 | struct orangefs_dir_part *next = part->next; | |
294 | vfree(part); | |
295 | part = next; | |
296 | } | |
297 | od->token = ORANGEFS_ITERATE_START; | |
298 | od->part = NULL; | |
299 | od->end = 1 << PART_SHIFT; | |
300 | } | |
301 | return default_llseek(file, offset, whence); | |
302 | } | |
303 | ||
382f4581 MB |
304 | static int orangefs_dir_iterate(struct file *file, |
305 | struct dir_context *ctx) | |
306 | { | |
307 | struct orangefs_inode_s *oi; | |
308 | struct orangefs_dir *od; | |
309 | struct dentry *dentry; | |
310 | int r; | |
5db11c21 | 311 | |
382f4581 MB |
312 | dentry = file->f_path.dentry; |
313 | oi = ORANGEFS_I(dentry->d_inode); | |
314 | od = file->private_data; | |
5db11c21 | 315 | |
382f4581 MB |
316 | if (od->error) |
317 | return od->error; | |
5db11c21 | 318 | |
382f4581 MB |
319 | if (ctx->pos == 0) { |
320 | if (!dir_emit_dot(file, ctx)) | |
321 | return 0; | |
322 | ctx->pos++; | |
5db11c21 | 323 | } |
382f4581 MB |
324 | if (ctx->pos == 1) { |
325 | if (!dir_emit_dotdot(file, ctx)) | |
326 | return 0; | |
480e3e53 | 327 | ctx->pos = 1 << PART_SHIFT; |
5db11c21 MM |
328 | } |
329 | ||
480e3e53 MB |
330 | /* |
331 | * The seek position is in the first synthesized part but is not | |
332 | * valid. | |
333 | */ | |
334 | if ((ctx->pos & PART_MASK) == 0) | |
335 | return -EIO; | |
336 | ||
382f4581 MB |
337 | r = 0; |
338 | ||
72f66b83 MB |
339 | /* |
340 | * Must read more if the user has sought past what has been read | |
341 | * so far. Stop a user who has sought past the end. | |
342 | */ | |
7b796ae3 | 343 | while (od->token != ORANGEFS_ITERATE_END && |
480e3e53 | 344 | ctx->pos > od->end) { |
72f66b83 MB |
345 | r = orangefs_dir_more(oi, od, dentry); |
346 | if (r) | |
347 | return r; | |
348 | } | |
7b796ae3 | 349 | if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) |
72f66b83 | 350 | return -EIO; |
72f66b83 MB |
351 | |
352 | /* Then try to fill if there's any left in the buffer. */ | |
480e3e53 | 353 | if (ctx->pos < od->end) { |
382f4581 MB |
354 | r = orangefs_dir_fill(oi, od, dentry, ctx); |
355 | if (r) | |
356 | return r; | |
5db11c21 MM |
357 | } |
358 | ||
72f66b83 | 359 | /* Finally get some more and try to fill. */ |
7b796ae3 | 360 | if (od->token != ORANGEFS_ITERATE_END) { |
382f4581 MB |
361 | r = orangefs_dir_more(oi, od, dentry); |
362 | if (r) | |
363 | return r; | |
364 | r = orangefs_dir_fill(oi, od, dentry, ctx); | |
5db11c21 MM |
365 | } |
366 | ||
382f4581 | 367 | return r; |
5db11c21 MM |
368 | } |
369 | ||
8bb8aefd | 370 | static int orangefs_dir_open(struct inode *inode, struct file *file) |
5db11c21 | 371 | { |
382f4581 MB |
372 | struct orangefs_dir *od; |
373 | file->private_data = kmalloc(sizeof(struct orangefs_dir), | |
374 | GFP_KERNEL); | |
5db11c21 MM |
375 | if (!file->private_data) |
376 | return -ENOMEM; | |
382f4581 | 377 | od = file->private_data; |
7b796ae3 | 378 | od->token = ORANGEFS_ITERATE_START; |
480e3e53 MB |
379 | od->part = NULL; |
380 | od->end = 1 << PART_SHIFT; | |
382f4581 | 381 | od->error = 0; |
5db11c21 MM |
382 | return 0; |
383 | } | |
384 | ||
8bb8aefd | 385 | static int orangefs_dir_release(struct inode *inode, struct file *file) |
5db11c21 | 386 | { |
382f4581 | 387 | struct orangefs_dir *od = file->private_data; |
480e3e53 | 388 | struct orangefs_dir_part *part = od->part; |
480e3e53 MB |
389 | while (part) { |
390 | struct orangefs_dir_part *next = part->next; | |
391 | vfree(part); | |
392 | part = next; | |
393 | } | |
382f4581 | 394 | kfree(od); |
5db11c21 MM |
395 | return 0; |
396 | } | |
397 | ||
8bb8aefd | 398 | const struct file_operations orangefs_dir_operations = { |
942835d6 | 399 | .llseek = orangefs_dir_llseek, |
5db11c21 | 400 | .read = generic_read_dir, |
382f4581 | 401 | .iterate = orangefs_dir_iterate, |
8bb8aefd | 402 | .open = orangefs_dir_open, |
382f4581 | 403 | .release = orangefs_dir_release |
5db11c21 | 404 | }; |