]> Git Repo - linux.git/blame - fs/btrfs/extent_map.c
Btrfs: Add file data csums back in via hooks in the extent map code
[linux.git] / fs / btrfs / extent_map.c
CommitLineData
a52d9a80
CM
1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h>
7#include <linux/page-flags.h>
8#include <linux/module.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include "extent_map.h"
12
13static struct kmem_cache *extent_map_cache;
14static struct kmem_cache *extent_state_cache;
15
16struct tree_entry {
17 u64 start;
18 u64 end;
19 int in_tree;
20 struct rb_node rb_node;
21};
22
23/* bits for the extent state */
24#define EXTENT_DIRTY 1
25#define EXTENT_WRITEBACK (1 << 1)
26#define EXTENT_UPTODATE (1 << 2)
27#define EXTENT_LOCKED (1 << 3)
28#define EXTENT_NEW (1 << 4)
29#define EXTENT_DELALLOC (1 << 5)
30
31#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
32
a52d9a80
CM
33void __init extent_map_init(void)
34{
35 extent_map_cache = kmem_cache_create("extent_map",
36 sizeof(struct extent_map), 0,
37 SLAB_RECLAIM_ACCOUNT |
38 SLAB_DESTROY_BY_RCU,
39 NULL);
40 extent_state_cache = kmem_cache_create("extent_state",
41 sizeof(struct extent_state), 0,
42 SLAB_RECLAIM_ACCOUNT |
43 SLAB_DESTROY_BY_RCU,
44 NULL);
45}
46
47void __exit extent_map_exit(void)
48{
a52d9a80
CM
49 if (extent_map_cache)
50 kmem_cache_destroy(extent_map_cache);
51 if (extent_state_cache)
52 kmem_cache_destroy(extent_state_cache);
53}
54
55void extent_map_tree_init(struct extent_map_tree *tree,
56 struct address_space *mapping, gfp_t mask)
57{
58 tree->map.rb_node = NULL;
59 tree->state.rb_node = NULL;
07157aac 60 tree->ops = NULL;
a52d9a80
CM
61 rwlock_init(&tree->lock);
62 tree->mapping = mapping;
63}
64EXPORT_SYMBOL(extent_map_tree_init);
65
66struct extent_map *alloc_extent_map(gfp_t mask)
67{
68 struct extent_map *em;
69 em = kmem_cache_alloc(extent_map_cache, mask);
70 if (!em || IS_ERR(em))
71 return em;
72 em->in_tree = 0;
73 atomic_set(&em->refs, 1);
74 return em;
75}
76EXPORT_SYMBOL(alloc_extent_map);
77
78void free_extent_map(struct extent_map *em)
79{
80 if (atomic_dec_and_test(&em->refs)) {
81 WARN_ON(em->in_tree);
82 kmem_cache_free(extent_map_cache, em);
83 }
84}
85EXPORT_SYMBOL(free_extent_map);
86
87
88struct extent_state *alloc_extent_state(gfp_t mask)
89{
90 struct extent_state *state;
91 state = kmem_cache_alloc(extent_state_cache, mask);
92 if (!state || IS_ERR(state))
93 return state;
94 state->state = 0;
95 state->in_tree = 0;
07157aac 96 state->private = 0;
a52d9a80
CM
97 atomic_set(&state->refs, 1);
98 init_waitqueue_head(&state->wq);
a52d9a80
CM
99 return state;
100}
101EXPORT_SYMBOL(alloc_extent_state);
102
103void free_extent_state(struct extent_state *state)
104{
105 if (atomic_dec_and_test(&state->refs)) {
106 WARN_ON(state->in_tree);
a52d9a80
CM
107 kmem_cache_free(extent_state_cache, state);
108 }
109}
110EXPORT_SYMBOL(free_extent_state);
111
112static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
113 struct rb_node *node)
114{
115 struct rb_node ** p = &root->rb_node;
116 struct rb_node * parent = NULL;
117 struct tree_entry *entry;
118
119 while(*p) {
120 parent = *p;
121 entry = rb_entry(parent, struct tree_entry, rb_node);
122
123 if (offset < entry->start)
124 p = &(*p)->rb_left;
125 else if (offset > entry->end)
126 p = &(*p)->rb_right;
127 else
128 return parent;
129 }
130
131 entry = rb_entry(node, struct tree_entry, rb_node);
132 entry->in_tree = 1;
133 rb_link_node(node, parent, p);
134 rb_insert_color(node, root);
135 return NULL;
136}
137
138static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
139 struct rb_node **prev_ret)
140{
141 struct rb_node * n = root->rb_node;
142 struct rb_node *prev = NULL;
143 struct tree_entry *entry;
144 struct tree_entry *prev_entry = NULL;
145
146 while(n) {
147 entry = rb_entry(n, struct tree_entry, rb_node);
148 prev = n;
149 prev_entry = entry;
150
151 if (offset < entry->start)
152 n = n->rb_left;
153 else if (offset > entry->end)
154 n = n->rb_right;
155 else
156 return n;
157 }
158 if (!prev_ret)
159 return NULL;
160 while(prev && offset > prev_entry->end) {
161 prev = rb_next(prev);
162 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
163 }
164 *prev_ret = prev;
165 return NULL;
166}
167
168static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
169{
170 struct rb_node *prev;
171 struct rb_node *ret;
172 ret = __tree_search(root, offset, &prev);
173 if (!ret)
174 return prev;
175 return ret;
176}
177
178static int tree_delete(struct rb_root *root, u64 offset)
179{
180 struct rb_node *node;
181 struct tree_entry *entry;
182
183 node = __tree_search(root, offset, NULL);
184 if (!node)
185 return -ENOENT;
186 entry = rb_entry(node, struct tree_entry, rb_node);
187 entry->in_tree = 0;
188 rb_erase(node, root);
189 return 0;
190}
191
192/*
193 * add_extent_mapping tries a simple backward merge with existing
194 * mappings. The extent_map struct passed in will be inserted into
195 * the tree directly (no copies made, just a reference taken).
196 */
197int add_extent_mapping(struct extent_map_tree *tree,
198 struct extent_map *em)
199{
200 int ret = 0;
201 struct extent_map *prev = NULL;
202 struct rb_node *rb;
203
204 write_lock_irq(&tree->lock);
205 rb = tree_insert(&tree->map, em->end, &em->rb_node);
206 if (rb) {
207 prev = rb_entry(rb, struct extent_map, rb_node);
208 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end);
209 ret = -EEXIST;
210 goto out;
211 }
212 atomic_inc(&em->refs);
213 if (em->start != 0) {
214 rb = rb_prev(&em->rb_node);
215 if (rb)
216 prev = rb_entry(rb, struct extent_map, rb_node);
217 if (prev && prev->end + 1 == em->start &&
218 ((em->block_start == 0 && prev->block_start == 0) ||
219 (em->block_start == prev->block_end + 1))) {
220 em->start = prev->start;
221 em->block_start = prev->block_start;
222 rb_erase(&prev->rb_node, &tree->map);
223 prev->in_tree = 0;
224 free_extent_map(prev);
225 }
226 }
227out:
228 write_unlock_irq(&tree->lock);
229 return ret;
230}
231EXPORT_SYMBOL(add_extent_mapping);
232
233/*
234 * lookup_extent_mapping returns the first extent_map struct in the
235 * tree that intersects the [start, end] (inclusive) range. There may
236 * be additional objects in the tree that intersect, so check the object
237 * returned carefully to make sure you don't need additional lookups.
238 */
239struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
240 u64 start, u64 end)
241{
242 struct extent_map *em;
243 struct rb_node *rb_node;
244
245 read_lock_irq(&tree->lock);
246 rb_node = tree_search(&tree->map, start);
247 if (!rb_node) {
248 em = NULL;
249 goto out;
250 }
251 if (IS_ERR(rb_node)) {
252 em = ERR_PTR(PTR_ERR(rb_node));
253 goto out;
254 }
255 em = rb_entry(rb_node, struct extent_map, rb_node);
256 if (em->end < start || em->start > end) {
257 em = NULL;
258 goto out;
259 }
260 atomic_inc(&em->refs);
261out:
262 read_unlock_irq(&tree->lock);
263 return em;
264}
265EXPORT_SYMBOL(lookup_extent_mapping);
266
267/*
268 * removes an extent_map struct from the tree. No reference counts are
269 * dropped, and no checks are done to see if the range is in use
270 */
271int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
272{
273 int ret;
274
275 write_lock_irq(&tree->lock);
276 ret = tree_delete(&tree->map, em->end);
277 write_unlock_irq(&tree->lock);
278 return ret;
279}
280EXPORT_SYMBOL(remove_extent_mapping);
281
282/*
283 * utility function to look for merge candidates inside a given range.
284 * Any extents with matching state are merged together into a single
285 * extent in the tree. Extents with EXTENT_IO in their state field
286 * are not merged because the end_io handlers need to be able to do
287 * operations on them without sleeping (or doing allocations/splits).
288 *
289 * This should be called with the tree lock held.
290 */
291static int merge_state(struct extent_map_tree *tree,
292 struct extent_state *state)
293{
294 struct extent_state *other;
295 struct rb_node *other_node;
296
297 if (state->state & EXTENT_IOBITS)
298 return 0;
299
300 other_node = rb_prev(&state->rb_node);
301 if (other_node) {
302 other = rb_entry(other_node, struct extent_state, rb_node);
303 if (other->end == state->start - 1 &&
304 other->state == state->state) {
305 state->start = other->start;
306 other->in_tree = 0;
307 rb_erase(&other->rb_node, &tree->state);
308 free_extent_state(other);
309 }
310 }
311 other_node = rb_next(&state->rb_node);
312 if (other_node) {
313 other = rb_entry(other_node, struct extent_state, rb_node);
314 if (other->start == state->end + 1 &&
315 other->state == state->state) {
316 other->start = state->start;
317 state->in_tree = 0;
318 rb_erase(&state->rb_node, &tree->state);
319 free_extent_state(state);
320 }
321 }
322 return 0;
323}
324
325/*
326 * insert an extent_state struct into the tree. 'bits' are set on the
327 * struct before it is inserted.
328 *
329 * This may return -EEXIST if the extent is already there, in which case the
330 * state struct is freed.
331 *
332 * The tree lock is not taken internally. This is a utility function and
333 * probably isn't what you want to call (see set/clear_extent_bit).
334 */
335static int insert_state(struct extent_map_tree *tree,
336 struct extent_state *state, u64 start, u64 end,
337 int bits)
338{
339 struct rb_node *node;
340
341 if (end < start) {
342 printk("end < start %Lu %Lu\n", end, start);
343 WARN_ON(1);
344 }
345 state->state |= bits;
346 state->start = start;
347 state->end = end;
348 if ((end & 4095) == 0) {
349 printk("insert state %Lu %Lu strange end\n", start, end);
350 WARN_ON(1);
351 }
352 node = tree_insert(&tree->state, end, &state->rb_node);
353 if (node) {
354 struct extent_state *found;
355 found = rb_entry(node, struct extent_state, rb_node);
b888db2b 356 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
a52d9a80
CM
357 free_extent_state(state);
358 return -EEXIST;
359 }
360 merge_state(tree, state);
361 return 0;
362}
363
364/*
365 * split a given extent state struct in two, inserting the preallocated
366 * struct 'prealloc' as the newly created second half. 'split' indicates an
367 * offset inside 'orig' where it should be split.
368 *
369 * Before calling,
370 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
371 * are two extent state structs in the tree:
372 * prealloc: [orig->start, split - 1]
373 * orig: [ split, orig->end ]
374 *
375 * The tree locks are not taken by this function. They need to be held
376 * by the caller.
377 */
378static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
379 struct extent_state *prealloc, u64 split)
380{
381 struct rb_node *node;
382 prealloc->start = orig->start;
383 prealloc->end = split - 1;
384 prealloc->state = orig->state;
385 orig->start = split;
386 if ((prealloc->end & 4095) == 0) {
387 printk("insert state %Lu %Lu strange end\n", prealloc->start,
388 prealloc->end);
389 WARN_ON(1);
390 }
391 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
392 if (node) {
393 struct extent_state *found;
394 found = rb_entry(node, struct extent_state, rb_node);
b888db2b 395 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
a52d9a80
CM
396 free_extent_state(prealloc);
397 return -EEXIST;
398 }
399 return 0;
400}
401
402/*
403 * utility function to clear some bits in an extent state struct.
404 * it will optionally wake up any one waiting on this state (wake == 1), or
405 * forcibly remove the state from the tree (delete == 1).
406 *
407 * If no bits are set on the state struct after clearing things, the
408 * struct is freed and removed from the tree
409 */
410static int clear_state_bit(struct extent_map_tree *tree,
411 struct extent_state *state, int bits, int wake,
412 int delete)
413{
414 int ret = state->state & bits;
415 state->state &= ~bits;
416 if (wake)
417 wake_up(&state->wq);
418 if (delete || state->state == 0) {
419 if (state->in_tree) {
420 rb_erase(&state->rb_node, &tree->state);
421 state->in_tree = 0;
422 free_extent_state(state);
423 } else {
424 WARN_ON(1);
425 }
426 } else {
427 merge_state(tree, state);
428 }
429 return ret;
430}
431
432/*
433 * clear some bits on a range in the tree. This may require splitting
434 * or inserting elements in the tree, so the gfp mask is used to
435 * indicate which allocations or sleeping are allowed.
436 *
437 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
438 * the given range from the tree regardless of state (ie for truncate).
439 *
440 * the range [start, end] is inclusive.
441 *
442 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
443 * bits were already set, or zero if none of the bits were already set.
444 */
445int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
446 int bits, int wake, int delete, gfp_t mask)
447{
448 struct extent_state *state;
449 struct extent_state *prealloc = NULL;
450 struct rb_node *node;
451 int err;
452 int set = 0;
453
454again:
455 if (!prealloc && (mask & __GFP_WAIT)) {
456 prealloc = alloc_extent_state(mask);
457 if (!prealloc)
458 return -ENOMEM;
459 }
460
461 write_lock_irq(&tree->lock);
462 /*
463 * this search will find the extents that end after
464 * our range starts
465 */
466 node = tree_search(&tree->state, start);
467 if (!node)
468 goto out;
469 state = rb_entry(node, struct extent_state, rb_node);
470 if (state->start > end)
471 goto out;
472 WARN_ON(state->end < start);
473
474 /*
475 * | ---- desired range ---- |
476 * | state | or
477 * | ------------- state -------------- |
478 *
479 * We need to split the extent we found, and may flip
480 * bits on second half.
481 *
482 * If the extent we found extends past our range, we
483 * just split and search again. It'll get split again
484 * the next time though.
485 *
486 * If the extent we found is inside our range, we clear
487 * the desired bit on it.
488 */
489
490 if (state->start < start) {
491 err = split_state(tree, state, prealloc, start);
492 BUG_ON(err == -EEXIST);
493 prealloc = NULL;
494 if (err)
495 goto out;
496 if (state->end <= end) {
497 start = state->end + 1;
498 set |= clear_state_bit(tree, state, bits,
499 wake, delete);
500 } else {
501 start = state->start;
502 }
503 goto search_again;
504 }
505 /*
506 * | ---- desired range ---- |
507 * | state |
508 * We need to split the extent, and clear the bit
509 * on the first half
510 */
511 if (state->start <= end && state->end > end) {
512 err = split_state(tree, state, prealloc, end + 1);
513 BUG_ON(err == -EEXIST);
514
515 if (wake)
516 wake_up(&state->wq);
517 set |= clear_state_bit(tree, prealloc, bits,
518 wake, delete);
519 prealloc = NULL;
520 goto out;
521 }
522
523 start = state->end + 1;
524 set |= clear_state_bit(tree, state, bits, wake, delete);
525 goto search_again;
526
527out:
528 write_unlock_irq(&tree->lock);
529 if (prealloc)
530 free_extent_state(prealloc);
531
532 return set;
533
534search_again:
535 if (start >= end)
536 goto out;
537 write_unlock_irq(&tree->lock);
538 if (mask & __GFP_WAIT)
539 cond_resched();
540 goto again;
541}
542EXPORT_SYMBOL(clear_extent_bit);
543
544static int wait_on_state(struct extent_map_tree *tree,
545 struct extent_state *state)
546{
547 DEFINE_WAIT(wait);
548 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
549 read_unlock_irq(&tree->lock);
550 schedule();
551 read_lock_irq(&tree->lock);
552 finish_wait(&state->wq, &wait);
553 return 0;
554}
555
556/*
557 * waits for one or more bits to clear on a range in the state tree.
558 * The range [start, end] is inclusive.
559 * The tree lock is taken by this function
560 */
561int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
562{
563 struct extent_state *state;
564 struct rb_node *node;
565
566 read_lock_irq(&tree->lock);
567again:
568 while (1) {
569 /*
570 * this search will find all the extents that end after
571 * our range starts
572 */
573 node = tree_search(&tree->state, start);
574 if (!node)
575 break;
576
577 state = rb_entry(node, struct extent_state, rb_node);
578
579 if (state->start > end)
580 goto out;
581
582 if (state->state & bits) {
583 start = state->start;
584 atomic_inc(&state->refs);
585 wait_on_state(tree, state);
586 free_extent_state(state);
587 goto again;
588 }
589 start = state->end + 1;
590
591 if (start > end)
592 break;
593
594 if (need_resched()) {
595 read_unlock_irq(&tree->lock);
596 cond_resched();
597 read_lock_irq(&tree->lock);
598 }
599 }
600out:
601 read_unlock_irq(&tree->lock);
602 return 0;
603}
604EXPORT_SYMBOL(wait_extent_bit);
605
606/*
607 * set some bits on a range in the tree. This may require allocations
608 * or sleeping, so the gfp mask is used to indicate what is allowed.
609 *
610 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
611 * range already has the desired bits set. The start of the existing
612 * range is returned in failed_start in this case.
613 *
614 * [start, end] is inclusive
615 * This takes the tree lock.
616 */
617int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
618 int exclusive, u64 *failed_start, gfp_t mask)
619{
620 struct extent_state *state;
621 struct extent_state *prealloc = NULL;
622 struct rb_node *node;
623 int err = 0;
624 int set;
625 u64 last_start;
626 u64 last_end;
627again:
628 if (!prealloc && (mask & __GFP_WAIT)) {
629 prealloc = alloc_extent_state(mask);
630 if (!prealloc)
631 return -ENOMEM;
632 }
633
634 write_lock_irq(&tree->lock);
635 /*
636 * this search will find all the extents that end after
637 * our range starts.
638 */
639 node = tree_search(&tree->state, start);
640 if (!node) {
641 err = insert_state(tree, prealloc, start, end, bits);
642 prealloc = NULL;
643 BUG_ON(err == -EEXIST);
644 goto out;
645 }
646
647 state = rb_entry(node, struct extent_state, rb_node);
648 last_start = state->start;
649 last_end = state->end;
650
651 /*
652 * | ---- desired range ---- |
653 * | state |
654 *
655 * Just lock what we found and keep going
656 */
657 if (state->start == start && state->end <= end) {
658 set = state->state & bits;
659 if (set && exclusive) {
660 *failed_start = state->start;
661 err = -EEXIST;
662 goto out;
663 }
664 state->state |= bits;
665 start = state->end + 1;
666 merge_state(tree, state);
667 goto search_again;
668 }
669
670 /*
671 * | ---- desired range ---- |
672 * | state |
673 * or
674 * | ------------- state -------------- |
675 *
676 * We need to split the extent we found, and may flip bits on
677 * second half.
678 *
679 * If the extent we found extends past our
680 * range, we just split and search again. It'll get split
681 * again the next time though.
682 *
683 * If the extent we found is inside our range, we set the
684 * desired bit on it.
685 */
686 if (state->start < start) {
687 set = state->state & bits;
688 if (exclusive && set) {
689 *failed_start = start;
690 err = -EEXIST;
691 goto out;
692 }
693 err = split_state(tree, state, prealloc, start);
694 BUG_ON(err == -EEXIST);
695 prealloc = NULL;
696 if (err)
697 goto out;
698 if (state->end <= end) {
699 state->state |= bits;
700 start = state->end + 1;
701 merge_state(tree, state);
702 } else {
703 start = state->start;
704 }
705 goto search_again;
706 }
707 /*
708 * | ---- desired range ---- |
709 * | state |
710 * We need to split the extent, and set the bit
711 * on the first half
712 */
713 if (state->start <= end && state->end > end) {
714 set = state->state & bits;
715 if (exclusive && set) {
716 *failed_start = start;
717 err = -EEXIST;
718 goto out;
719 }
720 err = split_state(tree, state, prealloc, end + 1);
721 BUG_ON(err == -EEXIST);
722
723 prealloc->state |= bits;
724 merge_state(tree, prealloc);
725 prealloc = NULL;
726 goto out;
727 }
728
729 /*
730 * | ---- desired range ---- |
731 * | state | or | state |
732 *
733 * There's a hole, we need to insert something in it and
734 * ignore the extent we found.
735 */
736 if (state->start > start) {
737 u64 this_end;
738 if (end < last_start)
739 this_end = end;
740 else
741 this_end = last_start -1;
742 err = insert_state(tree, prealloc, start, this_end,
743 bits);
744 prealloc = NULL;
745 BUG_ON(err == -EEXIST);
746 if (err)
747 goto out;
748 start = this_end + 1;
749 goto search_again;
750 }
751 goto search_again;
752
753out:
754 write_unlock_irq(&tree->lock);
755 if (prealloc)
756 free_extent_state(prealloc);
757
758 return err;
759
760search_again:
761 if (start > end)
762 goto out;
763 write_unlock_irq(&tree->lock);
764 if (mask & __GFP_WAIT)
765 cond_resched();
766 goto again;
767}
768EXPORT_SYMBOL(set_extent_bit);
769
770/* wrappers around set/clear extent bit */
771int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
772 gfp_t mask)
773{
774 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
775 mask);
776}
777EXPORT_SYMBOL(set_extent_dirty);
778
b888db2b
CM
779int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
780 gfp_t mask)
781{
782 return set_extent_bit(tree, start, end,
783 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
784 mask);
785}
786EXPORT_SYMBOL(set_extent_delalloc);
787
a52d9a80
CM
788int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
789 gfp_t mask)
790{
b888db2b
CM
791 return clear_extent_bit(tree, start, end,
792 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
a52d9a80
CM
793}
794EXPORT_SYMBOL(clear_extent_dirty);
795
796int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
797 gfp_t mask)
798{
799 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
800 mask);
801}
802EXPORT_SYMBOL(set_extent_new);
803
804int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
805 gfp_t mask)
806{
807 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
808}
809EXPORT_SYMBOL(clear_extent_new);
810
811int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
812 gfp_t mask)
813{
814 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
815 mask);
816}
817EXPORT_SYMBOL(set_extent_uptodate);
818
819int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
820 gfp_t mask)
821{
822 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
823}
824EXPORT_SYMBOL(clear_extent_uptodate);
825
826int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
827 gfp_t mask)
828{
829 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
830 0, NULL, mask);
831}
832EXPORT_SYMBOL(set_extent_writeback);
833
834int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
835 gfp_t mask)
836{
837 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
838}
839EXPORT_SYMBOL(clear_extent_writeback);
840
841int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
842{
843 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
844}
845EXPORT_SYMBOL(wait_on_extent_writeback);
846
847/*
848 * locks a range in ascending order, waiting for any locked regions
849 * it hits on the way. [start,end] are inclusive, and this will sleep.
850 */
851int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
852{
853 int err;
854 u64 failed_start;
855 while (1) {
856 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
857 &failed_start, mask);
858 if (err == -EEXIST && (mask & __GFP_WAIT)) {
859 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
860 start = failed_start;
861 } else {
862 break;
863 }
864 WARN_ON(start > end);
865 }
866 return err;
867}
868EXPORT_SYMBOL(lock_extent);
869
870int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
871 gfp_t mask)
872{
873 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
874}
875EXPORT_SYMBOL(unlock_extent);
876
877/*
878 * helper function to set pages and extents in the tree dirty
879 */
880int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
881{
882 unsigned long index = start >> PAGE_CACHE_SHIFT;
883 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
884 struct page *page;
885
886 while (index <= end_index) {
887 page = find_get_page(tree->mapping, index);
888 BUG_ON(!page);
889 __set_page_dirty_nobuffers(page);
890 page_cache_release(page);
891 index++;
892 }
893 set_extent_dirty(tree, start, end, GFP_NOFS);
894 return 0;
895}
896EXPORT_SYMBOL(set_range_dirty);
897
898/*
899 * helper function to set both pages and extents in the tree writeback
900 */
901int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
902{
903 unsigned long index = start >> PAGE_CACHE_SHIFT;
904 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
905 struct page *page;
906
907 while (index <= end_index) {
908 page = find_get_page(tree->mapping, index);
909 BUG_ON(!page);
910 set_page_writeback(page);
911 page_cache_release(page);
912 index++;
913 }
914 set_extent_writeback(tree, start, end, GFP_NOFS);
915 return 0;
916}
917EXPORT_SYMBOL(set_range_writeback);
918
b888db2b
CM
919u64 find_lock_delalloc_range(struct extent_map_tree *tree,
920 u64 start, u64 lock_start, u64 *end, u64 max_bytes)
921{
922 struct rb_node *node;
923 struct extent_state *state;
924 u64 cur_start = start;
925 u64 found = 0;
926 u64 total_bytes = 0;
927
928 write_lock_irq(&tree->lock);
929 /*
930 * this search will find all the extents that end after
931 * our range starts.
932 */
933search_again:
934 node = tree_search(&tree->state, cur_start);
935 if (!node || IS_ERR(node)) {
936 goto out;
937 }
938
939 while(1) {
940 state = rb_entry(node, struct extent_state, rb_node);
941 if (state->start != cur_start) {
942 goto out;
943 }
944 if (!(state->state & EXTENT_DELALLOC)) {
945 goto out;
946 }
947 if (state->start >= lock_start) {
948 if (state->state & EXTENT_LOCKED) {
949 DEFINE_WAIT(wait);
950 atomic_inc(&state->refs);
951 write_unlock_irq(&tree->lock);
952 schedule();
953 write_lock_irq(&tree->lock);
954 finish_wait(&state->wq, &wait);
955 free_extent_state(state);
956 goto search_again;
957 }
958 state->state |= EXTENT_LOCKED;
959 }
960 found++;
961 *end = state->end;
962 cur_start = state->end + 1;
963 node = rb_next(node);
964 if (!node)
965 break;
966 total_bytes = state->end - state->start + 1;
967 if (total_bytes >= max_bytes)
968 break;
969 }
970out:
971 write_unlock_irq(&tree->lock);
972 return found;
973}
974
a52d9a80
CM
975/*
976 * helper function to lock both pages and extents in the tree.
977 * pages must be locked first.
978 */
979int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
980{
981 unsigned long index = start >> PAGE_CACHE_SHIFT;
982 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
983 struct page *page;
984 int err;
985
986 while (index <= end_index) {
987 page = grab_cache_page(tree->mapping, index);
988 if (!page) {
989 err = -ENOMEM;
990 goto failed;
991 }
992 if (IS_ERR(page)) {
993 err = PTR_ERR(page);
994 goto failed;
995 }
996 index++;
997 }
998 lock_extent(tree, start, end, GFP_NOFS);
999 return 0;
1000
1001failed:
1002 /*
1003 * we failed above in getting the page at 'index', so we undo here
1004 * up to but not including the page at 'index'
1005 */
1006 end_index = index;
1007 index = start >> PAGE_CACHE_SHIFT;
1008 while (index < end_index) {
1009 page = find_get_page(tree->mapping, index);
1010 unlock_page(page);
1011 page_cache_release(page);
1012 index++;
1013 }
1014 return err;
1015}
1016EXPORT_SYMBOL(lock_range);
1017
1018/*
1019 * helper function to unlock both pages and extents in the tree.
1020 */
1021int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
1022{
1023 unsigned long index = start >> PAGE_CACHE_SHIFT;
1024 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1025 struct page *page;
1026
1027 while (index <= end_index) {
1028 page = find_get_page(tree->mapping, index);
1029 unlock_page(page);
1030 page_cache_release(page);
1031 index++;
1032 }
1033 unlock_extent(tree, start, end, GFP_NOFS);
1034 return 0;
1035}
1036EXPORT_SYMBOL(unlock_range);
1037
07157aac
CM
1038int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
1039{
1040 struct rb_node *node;
1041 struct extent_state *state;
1042 int ret = 0;
1043
1044 write_lock_irq(&tree->lock);
1045 /*
1046 * this search will find all the extents that end after
1047 * our range starts.
1048 */
1049 node = tree_search(&tree->state, start);
1050 if (!node || IS_ERR(node)) {
1051 ret = -ENOENT;
1052 goto out;
1053 }
1054 state = rb_entry(node, struct extent_state, rb_node);
1055 if (state->start != start) {
1056 ret = -ENOENT;
1057 goto out;
1058 }
1059 state->private = private;
1060out:
1061 write_unlock_irq(&tree->lock);
1062 return ret;
1063
1064}
1065
1066int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
1067{
1068 struct rb_node *node;
1069 struct extent_state *state;
1070 int ret = 0;
1071
1072 read_lock_irq(&tree->lock);
1073 /*
1074 * this search will find all the extents that end after
1075 * our range starts.
1076 */
1077 node = tree_search(&tree->state, start);
1078 if (!node || IS_ERR(node)) {
1079 ret = -ENOENT;
1080 goto out;
1081 }
1082 state = rb_entry(node, struct extent_state, rb_node);
1083 if (state->start != start) {
1084 ret = -ENOENT;
1085 goto out;
1086 }
1087 *private = state->private;
1088out:
1089 read_unlock_irq(&tree->lock);
1090 return ret;
1091}
1092
a52d9a80
CM
1093/*
1094 * searches a range in the state tree for a given mask.
1095 * If 'filled' == 1, this returns 1 only if ever extent in the tree
1096 * has the bits set. Otherwise, 1 is returned if any bit in the
1097 * range is found set.
1098 */
1099static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
1100 int bits, int filled)
1101{
1102 struct extent_state *state = NULL;
1103 struct rb_node *node;
1104 int bitset = 0;
1105
1106 read_lock_irq(&tree->lock);
1107 node = tree_search(&tree->state, start);
1108 while (node && start <= end) {
1109 state = rb_entry(node, struct extent_state, rb_node);
1110 if (state->start > end)
1111 break;
1112
1113 if (filled && state->start > start) {
1114 bitset = 0;
1115 break;
1116 }
1117 if (state->state & bits) {
1118 bitset = 1;
1119 if (!filled)
1120 break;
1121 } else if (filled) {
1122 bitset = 0;
1123 break;
1124 }
1125 start = state->end + 1;
1126 if (start > end)
1127 break;
1128 node = rb_next(node);
1129 }
1130 read_unlock_irq(&tree->lock);
1131 return bitset;
1132}
1133
1134/*
1135 * helper function to set a given page up to date if all the
1136 * extents in the tree for that page are up to date
1137 */
1138static int check_page_uptodate(struct extent_map_tree *tree,
1139 struct page *page)
1140{
1141 u64 start = page->index << PAGE_CACHE_SHIFT;
1142 u64 end = start + PAGE_CACHE_SIZE - 1;
1143 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1144 SetPageUptodate(page);
1145 return 0;
1146}
1147
1148/*
1149 * helper function to unlock a page if all the extents in the tree
1150 * for that page are unlocked
1151 */
1152static int check_page_locked(struct extent_map_tree *tree,
1153 struct page *page)
1154{
1155 u64 start = page->index << PAGE_CACHE_SHIFT;
1156 u64 end = start + PAGE_CACHE_SIZE - 1;
1157 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1158 unlock_page(page);
1159 return 0;
1160}
1161
1162/*
1163 * helper function to end page writeback if all the extents
1164 * in the tree for that page are done with writeback
1165 */
1166static int check_page_writeback(struct extent_map_tree *tree,
1167 struct page *page)
1168{
1169 u64 start = page->index << PAGE_CACHE_SHIFT;
1170 u64 end = start + PAGE_CACHE_SIZE - 1;
1171 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1172 end_page_writeback(page);
1173 return 0;
1174}
1175
1176/* lots and lots of room for performance fixes in the end_bio funcs */
1177
1178/*
1179 * after a writepage IO is done, we need to:
1180 * clear the uptodate bits on error
1181 * clear the writeback bits in the extent tree for this IO
1182 * end_page_writeback if the page has no more pending IO
1183 *
1184 * Scheduling is not allowed, so the extent state tree is expected
1185 * to have one and only one object corresponding to this IO.
1186 */
1187static int end_bio_extent_writepage(struct bio *bio,
1188 unsigned int bytes_done, int err)
1189{
1190 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1191 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1192 struct extent_map_tree *tree = bio->bi_private;
1193 u64 start;
1194 u64 end;
1195 int whole_page;
1196
1197 if (bio->bi_size)
1198 return 1;
1199
1200 do {
1201 struct page *page = bvec->bv_page;
1202 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1203 end = start + bvec->bv_len - 1;
1204
1205 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1206 whole_page = 1;
1207 else
1208 whole_page = 0;
1209
1210 if (--bvec >= bio->bi_io_vec)
1211 prefetchw(&bvec->bv_page->flags);
1212
1213 if (!uptodate) {
1214 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1215 ClearPageUptodate(page);
1216 SetPageError(page);
1217 }
1218 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1219
1220 if (whole_page)
1221 end_page_writeback(page);
1222 else
1223 check_page_writeback(tree, page);
1224 } while (bvec >= bio->bi_io_vec);
1225
1226 bio_put(bio);
1227 return 0;
1228}
1229
1230/*
1231 * after a readpage IO is done, we need to:
1232 * clear the uptodate bits on error
1233 * set the uptodate bits if things worked
1234 * set the page up to date if all extents in the tree are uptodate
1235 * clear the lock bit in the extent tree
1236 * unlock the page if there are no other extents locked for it
1237 *
1238 * Scheduling is not allowed, so the extent state tree is expected
1239 * to have one and only one object corresponding to this IO.
1240 */
1241static int end_bio_extent_readpage(struct bio *bio,
1242 unsigned int bytes_done, int err)
1243{
07157aac 1244 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
a52d9a80
CM
1245 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1246 struct extent_map_tree *tree = bio->bi_private;
1247 u64 start;
1248 u64 end;
1249 int whole_page;
07157aac 1250 int ret;
a52d9a80
CM
1251
1252 if (bio->bi_size)
1253 return 1;
1254
1255 do {
1256 struct page *page = bvec->bv_page;
1257 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1258 end = start + bvec->bv_len - 1;
1259
1260 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1261 whole_page = 1;
1262 else
1263 whole_page = 0;
1264
1265 if (--bvec >= bio->bi_io_vec)
1266 prefetchw(&bvec->bv_page->flags);
1267
07157aac
CM
1268 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1269 ret = tree->ops->readpage_end_io_hook(page, start, end);
1270 if (ret)
1271 uptodate = 0;
1272 }
a52d9a80
CM
1273 if (uptodate) {
1274 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1275 if (whole_page)
1276 SetPageUptodate(page);
1277 else
1278 check_page_uptodate(tree, page);
1279 } else {
1280 ClearPageUptodate(page);
1281 SetPageError(page);
1282 }
1283
1284 unlock_extent(tree, start, end, GFP_ATOMIC);
1285
1286 if (whole_page)
1287 unlock_page(page);
1288 else
1289 check_page_locked(tree, page);
1290 } while (bvec >= bio->bi_io_vec);
1291
1292 bio_put(bio);
1293 return 0;
1294}
1295
1296/*
1297 * IO done from prepare_write is pretty simple, we just unlock
1298 * the structs in the extent tree when done, and set the uptodate bits
1299 * as appropriate.
1300 */
1301static int end_bio_extent_preparewrite(struct bio *bio,
1302 unsigned int bytes_done, int err)
1303{
1304 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1305 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1306 struct extent_map_tree *tree = bio->bi_private;
1307 u64 start;
1308 u64 end;
1309
1310 if (bio->bi_size)
1311 return 1;
1312
1313 do {
1314 struct page *page = bvec->bv_page;
1315 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1316 end = start + bvec->bv_len - 1;
1317
1318 if (--bvec >= bio->bi_io_vec)
1319 prefetchw(&bvec->bv_page->flags);
1320
1321 if (uptodate) {
1322 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1323 } else {
1324 ClearPageUptodate(page);
1325 SetPageError(page);
1326 }
1327
1328 unlock_extent(tree, start, end, GFP_ATOMIC);
1329
1330 } while (bvec >= bio->bi_io_vec);
1331
1332 bio_put(bio);
1333 return 0;
1334}
1335
1336static int submit_extent_page(int rw, struct extent_map_tree *tree,
1337 struct page *page, sector_t sector,
1338 size_t size, unsigned long offset,
1339 struct block_device *bdev,
1340 bio_end_io_t end_io_func)
1341{
1342 struct bio *bio;
1343 int ret = 0;
1344
1345 bio = bio_alloc(GFP_NOIO, 1);
1346
1347 bio->bi_sector = sector;
1348 bio->bi_bdev = bdev;
1349 bio->bi_io_vec[0].bv_page = page;
1350 bio->bi_io_vec[0].bv_len = size;
1351 bio->bi_io_vec[0].bv_offset = offset;
1352
1353 bio->bi_vcnt = 1;
1354 bio->bi_idx = 0;
1355 bio->bi_size = size;
1356
1357 bio->bi_end_io = end_io_func;
1358 bio->bi_private = tree;
1359
1360 bio_get(bio);
1361 submit_bio(rw, bio);
1362
1363 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1364 ret = -EOPNOTSUPP;
1365
1366 bio_put(bio);
1367 return ret;
1368}
1369
1370/*
1371 * basic readpage implementation. Locked extent state structs are inserted
1372 * into the tree that are removed when the IO is done (by the end_io
1373 * handlers)
1374 */
1375int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
1376 get_extent_t *get_extent)
1377{
1378 struct inode *inode = page->mapping->host;
1379 u64 start = page->index << PAGE_CACHE_SHIFT;
1380 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1381 u64 end;
1382 u64 cur = start;
1383 u64 extent_offset;
1384 u64 last_byte = i_size_read(inode);
1385 u64 block_start;
1386 u64 cur_end;
1387 sector_t sector;
1388 struct extent_map *em;
1389 struct block_device *bdev;
1390 int ret;
1391 int nr = 0;
1392 size_t page_offset = 0;
1393 size_t iosize;
1394 size_t blocksize = inode->i_sb->s_blocksize;
1395
1396 if (!PagePrivate(page)) {
1397 SetPagePrivate(page);
1398 set_page_private(page, 1);
b888db2b 1399 WARN_ON(!page->mapping->a_ops->invalidatepage);
a52d9a80
CM
1400 page_cache_get(page);
1401 }
1402
1403 end = page_end;
1404 lock_extent(tree, start, end, GFP_NOFS);
1405
1406 while (cur <= end) {
1407 if (cur >= last_byte) {
1408 iosize = PAGE_CACHE_SIZE - page_offset;
1409 zero_user_page(page, page_offset, iosize, KM_USER0);
1410 set_extent_uptodate(tree, cur, cur + iosize - 1,
1411 GFP_NOFS);
1412 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1413 break;
1414 }
1415 em = get_extent(inode, page, page_offset, cur, end, 0);
1416 if (IS_ERR(em) || !em) {
1417 SetPageError(page);
1418 unlock_extent(tree, cur, end, GFP_NOFS);
1419 break;
1420 }
1421
1422 extent_offset = cur - em->start;
1423 BUG_ON(em->end < cur);
1424 BUG_ON(end < cur);
1425
1426 iosize = min(em->end - cur, end - cur) + 1;
1427 cur_end = min(em->end, end);
1428 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1429 sector = (em->block_start + extent_offset) >> 9;
1430 bdev = em->bdev;
1431 block_start = em->block_start;
1432 free_extent_map(em);
1433 em = NULL;
1434
1435 /* we've found a hole, just zero and go on */
1436 if (block_start == 0) {
1437 zero_user_page(page, page_offset, iosize, KM_USER0);
1438 set_extent_uptodate(tree, cur, cur + iosize - 1,
1439 GFP_NOFS);
1440 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1441 cur = cur + iosize;
1442 page_offset += iosize;
1443 continue;
1444 }
1445 /* the get_extent function already copied into the page */
1446 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1447 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1448 cur = cur + iosize;
1449 page_offset += iosize;
1450 continue;
1451 }
1452
07157aac
CM
1453 ret = 0;
1454 if (tree->ops && tree->ops->readpage_io_hook) {
1455 ret = tree->ops->readpage_io_hook(page, cur,
1456 cur + iosize - 1);
1457 }
1458 if (!ret) {
1459 ret = submit_extent_page(READ, tree, page,
1460 sector, iosize, page_offset,
1461 bdev, end_bio_extent_readpage);
1462 }
a52d9a80
CM
1463 if (ret)
1464 SetPageError(page);
1465 cur = cur + iosize;
1466 page_offset += iosize;
1467 nr++;
1468 }
1469 if (!nr) {
1470 if (!PageError(page))
1471 SetPageUptodate(page);
1472 unlock_page(page);
1473 }
1474 return 0;
1475}
1476EXPORT_SYMBOL(extent_read_full_page);
1477
1478/*
1479 * the writepage semantics are similar to regular writepage. extent
1480 * records are inserted to lock ranges in the tree, and as dirty areas
1481 * are found, they are marked writeback. Then the lock bits are removed
1482 * and the end_io handler clears the writeback ranges
1483 */
1484int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
1485 get_extent_t *get_extent,
1486 struct writeback_control *wbc)
1487{
1488 struct inode *inode = page->mapping->host;
1489 u64 start = page->index << PAGE_CACHE_SHIFT;
1490 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1491 u64 end;
1492 u64 cur = start;
1493 u64 extent_offset;
1494 u64 last_byte = i_size_read(inode);
1495 u64 block_start;
1496 sector_t sector;
1497 struct extent_map *em;
1498 struct block_device *bdev;
1499 int ret;
1500 int nr = 0;
1501 size_t page_offset = 0;
1502 size_t iosize;
1503 size_t blocksize;
1504 loff_t i_size = i_size_read(inode);
1505 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
b888db2b
CM
1506 u64 nr_delalloc;
1507 u64 delalloc_end;
a52d9a80 1508
b888db2b 1509 WARN_ON(!PageLocked(page));
a52d9a80
CM
1510 if (page->index > end_index) {
1511 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1512 unlock_page(page);
1513 return 0;
1514 }
1515
1516 if (page->index == end_index) {
1517 size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
1518 zero_user_page(page, offset,
1519 PAGE_CACHE_SIZE - offset, KM_USER0);
1520 }
1521
1522 if (!PagePrivate(page)) {
1523 SetPagePrivate(page);
1524 set_page_private(page, 1);
b888db2b 1525 WARN_ON(!page->mapping->a_ops->invalidatepage);
a52d9a80
CM
1526 page_cache_get(page);
1527 }
1528
a52d9a80 1529 lock_extent(tree, start, page_end, GFP_NOFS);
b888db2b
CM
1530 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1,
1531 &delalloc_end,
1532 128 * 1024 * 1024);
1533 if (nr_delalloc) {
07157aac 1534 tree->ops->fill_delalloc(inode, start, delalloc_end);
b888db2b
CM
1535 if (delalloc_end >= page_end + 1) {
1536 clear_extent_bit(tree, page_end + 1, delalloc_end,
1537 EXTENT_LOCKED | EXTENT_DELALLOC,
1538 1, 0, GFP_NOFS);
1539 }
1540 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC,
1541 0, 0, GFP_NOFS);
1542 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1543 printk("found delalloc bits after clear extent_bit\n");
1544 }
1545 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1546 printk("found delalloc bits after find_delalloc_range returns 0\n");
1547 }
1548
1549 end = page_end;
1550 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1551 printk("found delalloc bits after lock_extent\n");
1552 }
a52d9a80
CM
1553
1554 if (last_byte <= start) {
1555 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1556 goto done;
1557 }
1558
1559 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
1560 blocksize = inode->i_sb->s_blocksize;
1561
1562 while (cur <= end) {
1563 if (cur >= last_byte) {
1564 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
1565 break;
1566 }
b888db2b 1567 em = get_extent(inode, page, page_offset, cur, end, 0);
a52d9a80
CM
1568 if (IS_ERR(em) || !em) {
1569 SetPageError(page);
1570 break;
1571 }
1572
1573 extent_offset = cur - em->start;
1574 BUG_ON(em->end < cur);
1575 BUG_ON(end < cur);
1576 iosize = min(em->end - cur, end - cur) + 1;
1577 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1578 sector = (em->block_start + extent_offset) >> 9;
1579 bdev = em->bdev;
1580 block_start = em->block_start;
1581 free_extent_map(em);
1582 em = NULL;
1583
1584 if (block_start == 0 || block_start == EXTENT_MAP_INLINE) {
1585 clear_extent_dirty(tree, cur,
1586 cur + iosize - 1, GFP_NOFS);
1587 cur = cur + iosize;
1588 page_offset += iosize;
1589 continue;
1590 }
1591
1592 /* leave this out until we have a page_mkwrite call */
1593 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
1594 EXTENT_DIRTY, 0)) {
1595 cur = cur + iosize;
1596 page_offset += iosize;
1597 continue;
1598 }
1599 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
07157aac 1600 ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1);
a52d9a80
CM
1601 if (ret)
1602 SetPageError(page);
07157aac
CM
1603 else {
1604 set_range_writeback(tree, cur, cur + iosize - 1);
1605 ret = submit_extent_page(WRITE, tree, page, sector,
1606 iosize, page_offset, bdev,
1607 end_bio_extent_writepage);
1608 if (ret)
1609 SetPageError(page);
1610 }
a52d9a80
CM
1611 cur = cur + iosize;
1612 page_offset += iosize;
1613 nr++;
1614 }
1615done:
1616 WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0));
1617 unlock_extent(tree, start, page_end, GFP_NOFS);
1618 unlock_page(page);
1619 return 0;
1620}
1621EXPORT_SYMBOL(extent_write_full_page);
1622
1623/*
1624 * basic invalidatepage code, this waits on any locked or writeback
1625 * ranges corresponding to the page, and then deletes any extent state
1626 * records from the tree
1627 */
1628int extent_invalidatepage(struct extent_map_tree *tree,
1629 struct page *page, unsigned long offset)
1630{
1631 u64 start = (page->index << PAGE_CACHE_SHIFT);
1632 u64 end = start + PAGE_CACHE_SIZE - 1;
1633 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
1634
1635 start += (offset + blocksize -1) & ~(blocksize - 1);
1636 if (start > end)
1637 return 0;
1638
1639 lock_extent(tree, start, end, GFP_NOFS);
1640 wait_on_extent_writeback(tree, start, end);
1641 clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY,
1642 1, 1, GFP_NOFS);
1643 return 0;
1644}
1645EXPORT_SYMBOL(extent_invalidatepage);
1646
1647/*
1648 * simple commit_write call, set_range_dirty is used to mark both
1649 * the pages and the extent records as dirty
1650 */
1651int extent_commit_write(struct extent_map_tree *tree,
1652 struct inode *inode, struct page *page,
1653 unsigned from, unsigned to)
1654{
1655 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1656
1657 if (!PagePrivate(page)) {
1658 SetPagePrivate(page);
1659 set_page_private(page, 1);
b888db2b 1660 WARN_ON(!page->mapping->a_ops->invalidatepage);
a52d9a80
CM
1661 page_cache_get(page);
1662 }
1663
1664 set_page_dirty(page);
1665
1666 if (pos > inode->i_size) {
1667 i_size_write(inode, pos);
1668 mark_inode_dirty(inode);
1669 }
1670 return 0;
1671}
1672EXPORT_SYMBOL(extent_commit_write);
1673
1674int extent_prepare_write(struct extent_map_tree *tree,
1675 struct inode *inode, struct page *page,
1676 unsigned from, unsigned to, get_extent_t *get_extent)
1677{
1678 u64 page_start = page->index << PAGE_CACHE_SHIFT;
1679 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1680 u64 block_start;
1681 u64 orig_block_start;
1682 u64 block_end;
1683 u64 cur_end;
1684 struct extent_map *em;
1685 unsigned blocksize = 1 << inode->i_blkbits;
1686 size_t page_offset = 0;
1687 size_t block_off_start;
1688 size_t block_off_end;
1689 int err = 0;
1690 int iocount = 0;
1691 int ret = 0;
1692 int isnew;
1693
1694 if (!PagePrivate(page)) {
1695 SetPagePrivate(page);
1696 set_page_private(page, 1);
b888db2b 1697 WARN_ON(!page->mapping->a_ops->invalidatepage);
a52d9a80
CM
1698 page_cache_get(page);
1699 }
1700 block_start = (page_start + from) & ~((u64)blocksize - 1);
1701 block_end = (page_start + to - 1) | (blocksize - 1);
1702 orig_block_start = block_start;
1703
1704 lock_extent(tree, page_start, page_end, GFP_NOFS);
1705 while(block_start <= block_end) {
1706 em = get_extent(inode, page, page_offset, block_start,
1707 block_end, 1);
1708 if (IS_ERR(em) || !em) {
1709 goto err;
1710 }
1711 cur_end = min(block_end, em->end);
1712 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
1713 block_off_end = block_off_start + blocksize;
1714 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
1715
1716 if (!PageUptodate(page) && isnew &&
1717 (block_off_end > to || block_off_start < from)) {
1718 void *kaddr;
1719
1720 kaddr = kmap_atomic(page, KM_USER0);
1721 if (block_off_end > to)
1722 memset(kaddr + to, 0, block_off_end - to);
1723 if (block_off_start < from)
1724 memset(kaddr + block_off_start, 0,
1725 from - block_off_start);
1726 flush_dcache_page(page);
1727 kunmap_atomic(kaddr, KM_USER0);
1728 }
1729 if (!isnew && !PageUptodate(page) &&
1730 (block_off_end > to || block_off_start < from) &&
1731 !test_range_bit(tree, block_start, cur_end,
1732 EXTENT_UPTODATE, 1)) {
1733 u64 sector;
1734 u64 extent_offset = block_start - em->start;
1735 size_t iosize;
1736 sector = (em->block_start + extent_offset) >> 9;
1737 iosize = (cur_end - block_start + blocksize - 1) &
1738 ~((u64)blocksize - 1);
1739 /*
1740 * we've already got the extent locked, but we
1741 * need to split the state such that our end_bio
1742 * handler can clear the lock.
1743 */
1744 set_extent_bit(tree, block_start,
1745 block_start + iosize - 1,
1746 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
1747 ret = submit_extent_page(READ, tree, page,
1748 sector, iosize, page_offset, em->bdev,
1749 end_bio_extent_preparewrite);
1750 iocount++;
1751 block_start = block_start + iosize;
1752 } else {
1753 set_extent_uptodate(tree, block_start, cur_end,
1754 GFP_NOFS);
1755 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
1756 block_start = cur_end + 1;
1757 }
1758 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
1759 free_extent_map(em);
1760 }
1761 if (iocount) {
1762 wait_extent_bit(tree, orig_block_start,
1763 block_end, EXTENT_LOCKED);
1764 }
1765 check_page_uptodate(tree, page);
1766err:
1767 /* FIXME, zero out newly allocated blocks on error */
1768 return err;
1769}
1770EXPORT_SYMBOL(extent_prepare_write);
1771
1772/*
1773 * a helper for releasepage. As long as there are no locked extents
1774 * in the range corresponding to the page, both state records and extent
1775 * map records are removed
1776 */
1777int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
1778{
1779 struct extent_map *em;
1780 u64 start = page->index << PAGE_CACHE_SHIFT;
1781 u64 end = start + PAGE_CACHE_SIZE - 1;
1782 u64 orig_start = start;
b888db2b 1783 int ret = 1;
a52d9a80
CM
1784
1785 while (start <= end) {
1786 em = lookup_extent_mapping(tree, start, end);
1787 if (!em || IS_ERR(em))
1788 break;
b888db2b
CM
1789 if (!test_range_bit(tree, em->start, em->end,
1790 EXTENT_LOCKED, 0)) {
1791 remove_extent_mapping(tree, em);
1792 /* once for the rb tree */
a52d9a80 1793 free_extent_map(em);
a52d9a80 1794 }
a52d9a80 1795 start = em->end + 1;
a52d9a80
CM
1796 /* once for us */
1797 free_extent_map(em);
1798 }
b888db2b
CM
1799 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
1800 ret = 0;
1801 else
1802 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
1803 1, 1, GFP_NOFS);
1804 return ret;
a52d9a80
CM
1805}
1806EXPORT_SYMBOL(try_release_extent_mapping);
1807
This page took 0.218778 seconds and 4 git commands to generate.