]>
Commit | Line | Data |
---|---|---|
0bd49f94 RK |
1 | /* |
2 | * page.c - buffer/page management specific to NILFS | |
3 | * | |
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | * | |
20 | * Written by Ryusuke Konishi <[email protected]>, | |
21 | * Seiji Kihara <[email protected]>. | |
22 | */ | |
23 | ||
24 | #include <linux/pagemap.h> | |
25 | #include <linux/writeback.h> | |
26 | #include <linux/swap.h> | |
27 | #include <linux/bitops.h> | |
28 | #include <linux/page-flags.h> | |
29 | #include <linux/list.h> | |
30 | #include <linux/highmem.h> | |
31 | #include <linux/pagevec.h> | |
5a0e3ad6 | 32 | #include <linux/gfp.h> |
0bd49f94 RK |
33 | #include "nilfs.h" |
34 | #include "page.h" | |
35 | #include "mdt.h" | |
36 | ||
37 | ||
38 | #define NILFS_BUFFER_INHERENT_BITS \ | |
39 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ | |
1cb2d38c | 40 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked)) |
0bd49f94 RK |
41 | |
42 | static struct buffer_head * | |
43 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, | |
44 | int blkbits, unsigned long b_state) | |
45 | ||
46 | { | |
47 | unsigned long first_block; | |
48 | struct buffer_head *bh; | |
49 | ||
50 | if (!page_has_buffers(page)) | |
51 | create_empty_buffers(page, 1 << blkbits, b_state); | |
52 | ||
53 | first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); | |
54 | bh = nilfs_page_get_nth_block(page, block - first_block); | |
55 | ||
56 | touch_buffer(bh); | |
57 | wait_on_buffer(bh); | |
58 | return bh; | |
59 | } | |
60 | ||
0bd49f94 RK |
61 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, |
62 | struct address_space *mapping, | |
63 | unsigned long blkoff, | |
64 | unsigned long b_state) | |
65 | { | |
66 | int blkbits = inode->i_blkbits; | |
67 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); | |
c1c1d709 RK |
68 | struct page *page; |
69 | struct buffer_head *bh; | |
0bd49f94 RK |
70 | |
71 | page = grab_cache_page(mapping, index); | |
72 | if (unlikely(!page)) | |
73 | return NULL; | |
74 | ||
75 | bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); | |
76 | if (unlikely(!bh)) { | |
77 | unlock_page(page); | |
78 | page_cache_release(page); | |
79 | return NULL; | |
80 | } | |
0bd49f94 RK |
81 | return bh; |
82 | } | |
83 | ||
84 | /** | |
85 | * nilfs_forget_buffer - discard dirty state | |
86 | * @inode: owner inode of the buffer | |
87 | * @bh: buffer head of the buffer to be discarded | |
88 | */ | |
89 | void nilfs_forget_buffer(struct buffer_head *bh) | |
90 | { | |
91 | struct page *page = bh->b_page; | |
92 | ||
93 | lock_buffer(bh); | |
94 | clear_buffer_nilfs_volatile(bh); | |
4e13e66b | 95 | clear_buffer_nilfs_checked(bh); |
b1f6a4f2 | 96 | clear_buffer_nilfs_redirected(bh); |
84338237 RK |
97 | clear_buffer_dirty(bh); |
98 | if (nilfs_page_buffers_clean(page)) | |
0bd49f94 RK |
99 | __nilfs_clear_page_dirty(page); |
100 | ||
101 | clear_buffer_uptodate(bh); | |
102 | clear_buffer_mapped(bh); | |
103 | bh->b_blocknr = -1; | |
104 | ClearPageUptodate(page); | |
105 | ClearPageMappedToDisk(page); | |
106 | unlock_buffer(bh); | |
107 | brelse(bh); | |
108 | } | |
109 | ||
110 | /** | |
111 | * nilfs_copy_buffer -- copy buffer data and flags | |
112 | * @dbh: destination buffer | |
113 | * @sbh: source buffer | |
114 | */ | |
115 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) | |
116 | { | |
117 | void *kaddr0, *kaddr1; | |
118 | unsigned long bits; | |
119 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; | |
120 | struct buffer_head *bh; | |
121 | ||
7b9c0976 CW |
122 | kaddr0 = kmap_atomic(spage); |
123 | kaddr1 = kmap_atomic(dpage); | |
0bd49f94 | 124 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); |
7b9c0976 CW |
125 | kunmap_atomic(kaddr1); |
126 | kunmap_atomic(kaddr0); | |
0bd49f94 RK |
127 | |
128 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; | |
129 | dbh->b_blocknr = sbh->b_blocknr; | |
130 | dbh->b_bdev = sbh->b_bdev; | |
131 | ||
132 | bh = dbh; | |
133 | bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); | |
134 | while ((bh = bh->b_this_page) != dbh) { | |
135 | lock_buffer(bh); | |
136 | bits &= bh->b_state; | |
137 | unlock_buffer(bh); | |
138 | } | |
139 | if (bits & (1UL << BH_Uptodate)) | |
140 | SetPageUptodate(dpage); | |
141 | else | |
142 | ClearPageUptodate(dpage); | |
143 | if (bits & (1UL << BH_Mapped)) | |
144 | SetPageMappedToDisk(dpage); | |
145 | else | |
146 | ClearPageMappedToDisk(dpage); | |
147 | } | |
148 | ||
149 | /** | |
150 | * nilfs_page_buffers_clean - check if a page has dirty buffers or not. | |
151 | * @page: page to be checked | |
152 | * | |
153 | * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. | |
154 | * Otherwise, it returns non-zero value. | |
155 | */ | |
156 | int nilfs_page_buffers_clean(struct page *page) | |
157 | { | |
158 | struct buffer_head *bh, *head; | |
159 | ||
160 | bh = head = page_buffers(page); | |
161 | do { | |
162 | if (buffer_dirty(bh)) | |
163 | return 0; | |
164 | bh = bh->b_this_page; | |
165 | } while (bh != head); | |
166 | return 1; | |
167 | } | |
168 | ||
169 | void nilfs_page_bug(struct page *page) | |
170 | { | |
171 | struct address_space *m; | |
aa405b1f | 172 | unsigned long ino; |
0bd49f94 RK |
173 | |
174 | if (unlikely(!page)) { | |
175 | printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); | |
176 | return; | |
177 | } | |
178 | ||
179 | m = page->mapping; | |
aa405b1f RK |
180 | ino = m ? m->host->i_ino : 0; |
181 | ||
0bd49f94 RK |
182 | printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " |
183 | "mapping=%p ino=%lu\n", | |
184 | page, atomic_read(&page->_count), | |
185 | (unsigned long long)page->index, page->flags, m, ino); | |
186 | ||
187 | if (page_has_buffers(page)) { | |
188 | struct buffer_head *bh, *head; | |
189 | int i = 0; | |
190 | ||
191 | bh = head = page_buffers(page); | |
192 | do { | |
193 | printk(KERN_CRIT | |
194 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", | |
195 | i++, bh, atomic_read(&bh->b_count), | |
196 | (unsigned long long)bh->b_blocknr, bh->b_state); | |
197 | bh = bh->b_this_page; | |
198 | } while (bh != head); | |
199 | } | |
200 | } | |
201 | ||
0bd49f94 RK |
202 | /** |
203 | * nilfs_copy_page -- copy the page with buffers | |
204 | * @dst: destination page | |
205 | * @src: source page | |
206 | * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. | |
207 | * | |
7a65004b | 208 | * This function is for both data pages and btnode pages. The dirty flag |
0bd49f94 RK |
209 | * should be treated by caller. The page must not be under i/o. |
210 | * Both src and dst page must be locked | |
211 | */ | |
212 | static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) | |
213 | { | |
214 | struct buffer_head *dbh, *dbufs, *sbh, *sbufs; | |
215 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; | |
216 | ||
217 | BUG_ON(PageWriteback(dst)); | |
218 | ||
219 | sbh = sbufs = page_buffers(src); | |
220 | if (!page_has_buffers(dst)) | |
221 | create_empty_buffers(dst, sbh->b_size, 0); | |
222 | ||
223 | if (copy_dirty) | |
224 | mask |= (1UL << BH_Dirty); | |
225 | ||
226 | dbh = dbufs = page_buffers(dst); | |
227 | do { | |
228 | lock_buffer(sbh); | |
229 | lock_buffer(dbh); | |
230 | dbh->b_state = sbh->b_state & mask; | |
231 | dbh->b_blocknr = sbh->b_blocknr; | |
232 | dbh->b_bdev = sbh->b_bdev; | |
233 | sbh = sbh->b_this_page; | |
234 | dbh = dbh->b_this_page; | |
235 | } while (dbh != dbufs); | |
236 | ||
237 | copy_highpage(dst, src); | |
238 | ||
239 | if (PageUptodate(src) && !PageUptodate(dst)) | |
240 | SetPageUptodate(dst); | |
241 | else if (!PageUptodate(src) && PageUptodate(dst)) | |
242 | ClearPageUptodate(dst); | |
243 | if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) | |
244 | SetPageMappedToDisk(dst); | |
245 | else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) | |
246 | ClearPageMappedToDisk(dst); | |
247 | ||
248 | do { | |
249 | unlock_buffer(sbh); | |
250 | unlock_buffer(dbh); | |
251 | sbh = sbh->b_this_page; | |
252 | dbh = dbh->b_this_page; | |
253 | } while (dbh != dbufs); | |
254 | } | |
255 | ||
256 | int nilfs_copy_dirty_pages(struct address_space *dmap, | |
257 | struct address_space *smap) | |
258 | { | |
259 | struct pagevec pvec; | |
260 | unsigned int i; | |
261 | pgoff_t index = 0; | |
262 | int err = 0; | |
263 | ||
264 | pagevec_init(&pvec, 0); | |
265 | repeat: | |
266 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, | |
267 | PAGEVEC_SIZE)) | |
268 | return 0; | |
269 | ||
270 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
271 | struct page *page = pvec.pages[i], *dpage; | |
272 | ||
273 | lock_page(page); | |
274 | if (unlikely(!PageDirty(page))) | |
275 | NILFS_PAGE_BUG(page, "inconsistent dirty state"); | |
276 | ||
277 | dpage = grab_cache_page(dmap, page->index); | |
278 | if (unlikely(!dpage)) { | |
279 | /* No empty page is added to the page cache */ | |
280 | err = -ENOMEM; | |
281 | unlock_page(page); | |
282 | break; | |
283 | } | |
284 | if (unlikely(!page_has_buffers(page))) | |
285 | NILFS_PAGE_BUG(page, | |
286 | "found empty page in dat page cache"); | |
287 | ||
288 | nilfs_copy_page(dpage, page, 1); | |
289 | __set_page_dirty_nobuffers(dpage); | |
290 | ||
291 | unlock_page(dpage); | |
292 | page_cache_release(dpage); | |
293 | unlock_page(page); | |
294 | } | |
295 | pagevec_release(&pvec); | |
296 | cond_resched(); | |
297 | ||
298 | if (likely(!err)) | |
299 | goto repeat; | |
300 | return err; | |
301 | } | |
302 | ||
303 | /** | |
7a65004b | 304 | * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache |
0bd49f94 RK |
305 | * @dmap: destination page cache |
306 | * @smap: source page cache | |
307 | * | |
308 | * No pages must no be added to the cache during this process. | |
309 | * This must be ensured by the caller. | |
310 | */ | |
311 | void nilfs_copy_back_pages(struct address_space *dmap, | |
312 | struct address_space *smap) | |
313 | { | |
314 | struct pagevec pvec; | |
315 | unsigned int i, n; | |
316 | pgoff_t index = 0; | |
317 | int err; | |
318 | ||
319 | pagevec_init(&pvec, 0); | |
320 | repeat: | |
321 | n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); | |
322 | if (!n) | |
323 | return; | |
324 | index = pvec.pages[n - 1]->index + 1; | |
325 | ||
326 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
327 | struct page *page = pvec.pages[i], *dpage; | |
328 | pgoff_t offset = page->index; | |
329 | ||
330 | lock_page(page); | |
331 | dpage = find_lock_page(dmap, offset); | |
332 | if (dpage) { | |
333 | /* override existing page on the destination cache */ | |
1f5abe7e | 334 | WARN_ON(PageDirty(dpage)); |
0bd49f94 RK |
335 | nilfs_copy_page(dpage, page, 0); |
336 | unlock_page(dpage); | |
337 | page_cache_release(dpage); | |
338 | } else { | |
339 | struct page *page2; | |
340 | ||
341 | /* move the page to the destination cache */ | |
342 | spin_lock_irq(&smap->tree_lock); | |
343 | page2 = radix_tree_delete(&smap->page_tree, offset); | |
1f5abe7e RK |
344 | WARN_ON(page2 != page); |
345 | ||
0bd49f94 RK |
346 | smap->nrpages--; |
347 | spin_unlock_irq(&smap->tree_lock); | |
348 | ||
349 | spin_lock_irq(&dmap->tree_lock); | |
350 | err = radix_tree_insert(&dmap->page_tree, offset, page); | |
351 | if (unlikely(err < 0)) { | |
1f5abe7e | 352 | WARN_ON(err == -EEXIST); |
0bd49f94 RK |
353 | page->mapping = NULL; |
354 | page_cache_release(page); /* for cache */ | |
355 | } else { | |
356 | page->mapping = dmap; | |
357 | dmap->nrpages++; | |
358 | if (PageDirty(page)) | |
359 | radix_tree_tag_set(&dmap->page_tree, | |
360 | offset, | |
361 | PAGECACHE_TAG_DIRTY); | |
362 | } | |
363 | spin_unlock_irq(&dmap->tree_lock); | |
364 | } | |
365 | unlock_page(page); | |
366 | } | |
367 | pagevec_release(&pvec); | |
368 | cond_resched(); | |
369 | ||
370 | goto repeat; | |
371 | } | |
372 | ||
373 | void nilfs_clear_dirty_pages(struct address_space *mapping) | |
374 | { | |
375 | struct pagevec pvec; | |
376 | unsigned int i; | |
377 | pgoff_t index = 0; | |
378 | ||
379 | pagevec_init(&pvec, 0); | |
380 | ||
381 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | |
382 | PAGEVEC_SIZE)) { | |
383 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
384 | struct page *page = pvec.pages[i]; | |
385 | struct buffer_head *bh, *head; | |
386 | ||
387 | lock_page(page); | |
388 | ClearPageUptodate(page); | |
389 | ClearPageMappedToDisk(page); | |
390 | bh = head = page_buffers(page); | |
391 | do { | |
392 | lock_buffer(bh); | |
393 | clear_buffer_dirty(bh); | |
394 | clear_buffer_nilfs_volatile(bh); | |
4e13e66b | 395 | clear_buffer_nilfs_checked(bh); |
b1f6a4f2 | 396 | clear_buffer_nilfs_redirected(bh); |
0bd49f94 RK |
397 | clear_buffer_uptodate(bh); |
398 | clear_buffer_mapped(bh); | |
399 | unlock_buffer(bh); | |
400 | bh = bh->b_this_page; | |
401 | } while (bh != head); | |
402 | ||
403 | __nilfs_clear_page_dirty(page); | |
404 | unlock_page(page); | |
405 | } | |
406 | pagevec_release(&pvec); | |
407 | cond_resched(); | |
408 | } | |
409 | } | |
410 | ||
411 | unsigned nilfs_page_count_clean_buffers(struct page *page, | |
412 | unsigned from, unsigned to) | |
413 | { | |
414 | unsigned block_start, block_end; | |
415 | struct buffer_head *bh, *head; | |
416 | unsigned nc = 0; | |
417 | ||
418 | for (bh = head = page_buffers(page), block_start = 0; | |
419 | bh != head || !block_start; | |
420 | block_start = block_end, bh = bh->b_this_page) { | |
421 | block_end = block_start + bh->b_size; | |
422 | if (block_end > from && block_start < to && !buffer_dirty(bh)) | |
423 | nc++; | |
424 | } | |
425 | return nc; | |
426 | } | |
ae53a0a2 | 427 | |
aa405b1f | 428 | void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, |
7eaceacc | 429 | struct backing_dev_info *bdi) |
ebdfed4d | 430 | { |
aa405b1f | 431 | mapping->host = inode; |
ebdfed4d RK |
432 | mapping->flags = 0; |
433 | mapping_set_gfp_mask(mapping, GFP_NOFS); | |
434 | mapping->assoc_mapping = NULL; | |
435 | mapping->backing_dev_info = bdi; | |
d611b22f | 436 | mapping->a_ops = &empty_aops; |
ebdfed4d | 437 | } |
0bd49f94 RK |
438 | |
439 | /* | |
440 | * NILFS2 needs clear_page_dirty() in the following two cases: | |
441 | * | |
442 | * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears | |
443 | * page dirty flags when it copies back pages from the shadow cache | |
444 | * (gcdat->{i_mapping,i_btnode_cache}) to its original cache | |
445 | * (dat->{i_mapping,i_btnode_cache}). | |
446 | * | |
447 | * 2) Some B-tree operations like insertion or deletion may dispose buffers | |
448 | * in dirty state, and this needs to cancel the dirty state of their pages. | |
449 | */ | |
450 | int __nilfs_clear_page_dirty(struct page *page) | |
451 | { | |
452 | struct address_space *mapping = page->mapping; | |
453 | ||
454 | if (mapping) { | |
455 | spin_lock_irq(&mapping->tree_lock); | |
456 | if (test_bit(PG_dirty, &page->flags)) { | |
457 | radix_tree_tag_clear(&mapping->page_tree, | |
458 | page_index(page), | |
459 | PAGECACHE_TAG_DIRTY); | |
460 | spin_unlock_irq(&mapping->tree_lock); | |
461 | return clear_page_dirty_for_io(page); | |
462 | } | |
463 | spin_unlock_irq(&mapping->tree_lock); | |
464 | return 0; | |
465 | } | |
466 | return TestClearPageDirty(page); | |
467 | } | |
622daaff RK |
468 | |
469 | /** | |
470 | * nilfs_find_uncommitted_extent - find extent of uncommitted data | |
471 | * @inode: inode | |
472 | * @start_blk: start block offset (in) | |
473 | * @blkoff: start offset of the found extent (out) | |
474 | * | |
475 | * This function searches an extent of buffers marked "delayed" which | |
476 | * starts from a block offset equal to or larger than @start_blk. If | |
477 | * such an extent was found, this will store the start offset in | |
478 | * @blkoff and return its length in blocks. Otherwise, zero is | |
479 | * returned. | |
480 | */ | |
481 | unsigned long nilfs_find_uncommitted_extent(struct inode *inode, | |
482 | sector_t start_blk, | |
483 | sector_t *blkoff) | |
484 | { | |
485 | unsigned int i; | |
486 | pgoff_t index; | |
487 | unsigned int nblocks_in_page; | |
488 | unsigned long length = 0; | |
489 | sector_t b; | |
490 | struct pagevec pvec; | |
491 | struct page *page; | |
492 | ||
493 | if (inode->i_mapping->nrpages == 0) | |
494 | return 0; | |
495 | ||
496 | index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | |
497 | nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits); | |
498 | ||
499 | pagevec_init(&pvec, 0); | |
500 | ||
501 | repeat: | |
502 | pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE, | |
503 | pvec.pages); | |
504 | if (pvec.nr == 0) | |
505 | return length; | |
506 | ||
507 | if (length > 0 && pvec.pages[0]->index > index) | |
508 | goto out; | |
509 | ||
510 | b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | |
511 | i = 0; | |
512 | do { | |
513 | page = pvec.pages[i]; | |
514 | ||
515 | lock_page(page); | |
516 | if (page_has_buffers(page)) { | |
517 | struct buffer_head *bh, *head; | |
518 | ||
519 | bh = head = page_buffers(page); | |
520 | do { | |
521 | if (b < start_blk) | |
522 | continue; | |
523 | if (buffer_delay(bh)) { | |
524 | if (length == 0) | |
525 | *blkoff = b; | |
526 | length++; | |
527 | } else if (length > 0) { | |
528 | goto out_locked; | |
529 | } | |
530 | } while (++b, bh = bh->b_this_page, bh != head); | |
531 | } else { | |
532 | if (length > 0) | |
533 | goto out_locked; | |
534 | ||
535 | b += nblocks_in_page; | |
536 | } | |
537 | unlock_page(page); | |
538 | ||
539 | } while (++i < pagevec_count(&pvec)); | |
540 | ||
541 | index = page->index + 1; | |
542 | pagevec_release(&pvec); | |
543 | cond_resched(); | |
544 | goto repeat; | |
545 | ||
546 | out_locked: | |
547 | unlock_page(page); | |
548 | out: | |
549 | pagevec_release(&pvec); | |
550 | return length; | |
551 | } |