2 * (C) 2001 Clemson University and The University of Chicago
4 * See COPYING in top-level directory.
7 #include "pvfs2-kernel.h"
8 #include "pvfs2-bufmap.h"
10 DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq);
12 static struct pvfs2_bufmap {
21 struct page **page_array;
22 struct pvfs_bufmap_desc *desc_array;
24 /* array to track usage of buffer descriptors */
25 int *buffer_index_array;
26 spinlock_t buffer_index_lock;
28 /* array to track usage of buffer descriptors for readdir */
29 int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT];
30 spinlock_t readdir_index_lock;
33 static DEFINE_SPINLOCK(pvfs2_bufmap_lock);
36 pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap)
40 for (i = 0; i < bufmap->page_count; i++)
41 page_cache_release(bufmap->page_array[i]);
45 pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap)
47 kfree(bufmap->page_array);
48 kfree(bufmap->desc_array);
49 kfree(bufmap->buffer_index_array);
53 struct pvfs2_bufmap *pvfs2_bufmap_ref(void)
55 struct pvfs2_bufmap *bufmap = NULL;
57 spin_lock(&pvfs2_bufmap_lock);
59 bufmap = __pvfs2_bufmap;
60 atomic_inc(&bufmap->refcnt);
62 spin_unlock(&pvfs2_bufmap_lock);
66 void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap)
68 if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) {
69 __pvfs2_bufmap = NULL;
70 spin_unlock(&pvfs2_bufmap_lock);
72 pvfs2_bufmap_unmap(bufmap);
73 pvfs2_bufmap_free(bufmap);
77 inline int pvfs_bufmap_size_query(void)
79 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
80 int size = bufmap ? bufmap->desc_size : 0;
82 pvfs2_bufmap_unref(bufmap);
86 inline int pvfs_bufmap_shift_query(void)
88 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
89 int shift = bufmap ? bufmap->desc_shift : 0;
91 pvfs2_bufmap_unref(bufmap);
95 static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
96 static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
101 * If bufmap_init is 1, then the shared memory system, including the
102 * buffer_index_array, is available. Otherwise, it is not.
104 * returns the value of bufmap_init
106 int get_bufmap_init(void)
108 return __pvfs2_bufmap ? 1 : 0;
112 static struct pvfs2_bufmap *
113 pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc)
115 struct pvfs2_bufmap *bufmap;
117 bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
121 atomic_set(&bufmap->refcnt, 1);
122 bufmap->total_size = user_desc->total_size;
123 bufmap->desc_count = user_desc->count;
124 bufmap->desc_size = user_desc->size;
125 bufmap->desc_shift = ilog2(bufmap->desc_size);
127 spin_lock_init(&bufmap->buffer_index_lock);
128 bufmap->buffer_index_array =
129 kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
130 if (!bufmap->buffer_index_array) {
131 gossip_err("pvfs2: could not allocate %d buffer indices\n",
133 goto out_free_bufmap;
135 spin_lock_init(&bufmap->readdir_index_lock);
138 kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc),
140 if (!bufmap->desc_array) {
141 gossip_err("pvfs2: could not allocate %d descriptors\n",
143 goto out_free_index_array;
146 bufmap->page_count = bufmap->total_size / PAGE_SIZE;
148 /* allocate storage to track our page mappings */
150 kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
151 if (!bufmap->page_array)
152 goto out_free_desc_array;
157 kfree(bufmap->desc_array);
158 out_free_index_array:
159 kfree(bufmap->buffer_index_array);
167 pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap,
168 struct PVFS_dev_map_desc *user_desc)
170 int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
171 int offset = 0, ret, i;
174 down_write(¤t->mm->mmap_sem);
175 ret = get_user_pages(current,
177 (unsigned long)user_desc->ptr,
183 up_write(¤t->mm->mmap_sem);
188 if (ret != bufmap->page_count) {
189 gossip_err("pvfs2 error: asked for %d pages, only got %d.\n",
190 bufmap->page_count, ret);
192 for (i = 0; i < ret; i++) {
193 SetPageError(bufmap->page_array[i]);
194 page_cache_release(bufmap->page_array[i]);
200 * ideally we want to get kernel space pointers for each page, but
201 * we can't kmap that many pages at once if highmem is being used.
202 * so instead, we just kmap/kunmap the page address each time the
205 for (i = 0; i < bufmap->page_count; i++)
206 flush_dcache_page(bufmap->page_array[i]);
208 /* build a list of available descriptors */
209 for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
210 bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
211 bufmap->desc_array[i].array_count = pages_per_desc;
212 bufmap->desc_array[i].uaddr =
213 (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
214 offset += pages_per_desc;
221 * pvfs_bufmap_initialize()
223 * initializes the mapped buffer interface
225 * returns 0 on success, -errno on failure
227 int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc)
229 struct pvfs2_bufmap *bufmap;
232 gossip_debug(GOSSIP_BUFMAP_DEBUG,
233 "pvfs_bufmap_initialize: called (ptr ("
234 "%p) sz (%d) cnt(%d).\n",
240 * sanity check alignment and size of buffer that caller wants to
243 if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
244 (unsigned long)user_desc->ptr) {
245 gossip_err("pvfs2 error: memory alignment (front). %p\n",
250 if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
251 != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
252 gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n",
254 user_desc->total_size);
258 if (user_desc->total_size != (user_desc->size * user_desc->count)) {
259 gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n",
260 user_desc->total_size,
266 if ((user_desc->size % PAGE_SIZE) != 0) {
267 gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n",
273 bufmap = pvfs2_bufmap_alloc(user_desc);
277 ret = pvfs2_bufmap_map(bufmap, user_desc);
279 goto out_free_bufmap;
282 spin_lock(&pvfs2_bufmap_lock);
283 if (__pvfs2_bufmap) {
284 spin_unlock(&pvfs2_bufmap_lock);
285 gossip_err("pvfs2: error: bufmap already initialized.\n");
287 goto out_unmap_bufmap;
289 __pvfs2_bufmap = bufmap;
290 spin_unlock(&pvfs2_bufmap_lock);
293 * If there are operations in pvfs2_bufmap_init_waitq, wake them up.
294 * This scenario occurs when the client-core is restarted and I/O
295 * requests in the in-progress or waiting tables are restarted. I/O
296 * requests cannot be restarted until the shared memory system is
297 * completely re-initialized, so we put the I/O requests in this
298 * waitq until initialization has completed. NOTE: the I/O requests
299 * are also on a timer, so they don't wait forever just in case the
300 * client-core doesn't come back up.
302 wake_up_interruptible(&pvfs2_bufmap_init_waitq);
304 gossip_debug(GOSSIP_BUFMAP_DEBUG,
305 "pvfs_bufmap_initialize: exiting normally\n");
309 pvfs2_bufmap_unmap(bufmap);
311 pvfs2_bufmap_free(bufmap);
317 * pvfs_bufmap_finalize()
319 * shuts down the mapped buffer interface and releases any resources
324 void pvfs_bufmap_finalize(void)
326 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n");
327 BUG_ON(!__pvfs2_bufmap);
328 pvfs2_bufmap_unref(__pvfs2_bufmap);
329 gossip_debug(GOSSIP_BUFMAP_DEBUG,
330 "pvfs2_bufmap_finalize: exiting normally\n");
336 spinlock_t *slot_lock;
337 wait_queue_head_t *slot_wq;
340 static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
344 DECLARE_WAITQUEUE(my_wait, current);
347 add_wait_queue_exclusive(slargs->slot_wq, &my_wait);
350 set_current_state(TASK_INTERRUPTIBLE);
353 * check for available desc, slot_lock is the appropriate
356 spin_lock(slargs->slot_lock);
357 for (i = 0; i < slargs->slot_count; i++)
358 if (slargs->slot_array[i] == 0) {
359 slargs->slot_array[i] = 1;
364 spin_unlock(slargs->slot_lock);
366 /* if we acquired a buffer, then break out of while */
370 if (!signal_pending(current)) {
372 MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
373 gossip_debug(GOSSIP_BUFMAP_DEBUG,
374 "[BUFMAP]: waiting %d "
375 "seconds for a slot\n",
377 if (!schedule_timeout(timeout)) {
378 gossip_debug(GOSSIP_BUFMAP_DEBUG,
379 "*** wait_for_a_slot timed out\n");
383 gossip_debug(GOSSIP_BUFMAP_DEBUG,
384 "[BUFMAP]: woken up by a slot becoming available.\n");
388 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n",
394 set_current_state(TASK_RUNNING);
395 remove_wait_queue(slargs->slot_wq, &my_wait);
399 static void put_back_slot(struct slot_args *slargs, int buffer_index)
401 /* slot_lock is the appropriate index_lock */
402 spin_lock(slargs->slot_lock);
403 if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
404 spin_unlock(slargs->slot_lock);
408 /* put the desc back on the queue */
409 slargs->slot_array[buffer_index] = 0;
410 spin_unlock(slargs->slot_lock);
412 /* wake up anyone who may be sleeping on the queue */
413 wake_up_interruptible(slargs->slot_wq);
419 * gets a free mapped buffer descriptor, will sleep until one becomes
420 * available if necessary
422 * returns 0 on success, -errno on failure
424 int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index)
426 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
427 struct slot_args slargs;
431 gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
435 slargs.slot_count = bufmap->desc_count;
436 slargs.slot_array = bufmap->buffer_index_array;
437 slargs.slot_lock = &bufmap->buffer_index_lock;
438 slargs.slot_wq = &bufmap_waitq;
439 ret = wait_for_a_slot(&slargs, buffer_index);
441 pvfs2_bufmap_unref(bufmap);
449 * returns a mapped buffer descriptor to the collection
453 void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index)
455 struct slot_args slargs;
457 slargs.slot_count = bufmap->desc_count;
458 slargs.slot_array = bufmap->buffer_index_array;
459 slargs.slot_lock = &bufmap->buffer_index_lock;
460 slargs.slot_wq = &bufmap_waitq;
461 put_back_slot(&slargs, buffer_index);
462 pvfs2_bufmap_unref(bufmap);
466 * readdir_index_get()
468 * gets a free descriptor, will sleep until one becomes
469 * available if necessary.
470 * Although the readdir buffers are not mapped into kernel space
471 * we could do that at a later point of time. Regardless, these
472 * indices are used by the client-core.
474 * returns 0 on success, -errno on failure
476 int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index)
478 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
479 struct slot_args slargs;
483 gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
487 slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
488 slargs.slot_array = bufmap->readdir_index_array;
489 slargs.slot_lock = &bufmap->readdir_index_lock;
490 slargs.slot_wq = &readdir_waitq;
491 ret = wait_for_a_slot(&slargs, buffer_index);
493 pvfs2_bufmap_unref(bufmap);
498 void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index)
500 struct slot_args slargs;
502 slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
503 slargs.slot_array = bufmap->readdir_index_array;
504 slargs.slot_lock = &bufmap->readdir_index_lock;
505 slargs.slot_wq = &readdir_waitq;
506 put_back_slot(&slargs, buffer_index);
507 pvfs2_bufmap_unref(bufmap);
510 int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap,
511 struct iov_iter *iter,
515 struct pvfs_bufmap_desc *to;
520 gossip_debug(GOSSIP_BUFMAP_DEBUG,
521 "%s: buffer_index:%d: size:%lu:\n",
522 __func__, buffer_index, size);
524 to = &bufmap->desc_array[buffer_index];
526 for (i = 0; size; i++) {
527 page = to->page_array[i];
528 copied = copy_page_from_iter(page, 0, PAGE_SIZE, iter);
530 if ((copied == 0) && (size))
534 return size ? -EFAULT : 0;
539 * Iterate through the array of pages containing the bytes from
543 int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap,
544 struct iov_iter *iter,
547 struct pvfs_bufmap_desc *from;
552 gossip_debug(GOSSIP_BUFMAP_DEBUG,
553 "%s: buffer_index:%d: iov_iter_count(iter):%lu:\n",
554 __func__, buffer_index, iov_iter_count(iter));
556 from = &bufmap->desc_array[buffer_index];
558 for (i = 0; iov_iter_count(iter); i++) {
559 page = from->page_array[i];
560 written = copy_page_to_iter(page, 0, PAGE_SIZE, iter);
561 if ((written == 0) && (iov_iter_count(iter)))
565 return iov_iter_count(iter) ? -EFAULT : 0;