2 * JFFS2 -- Journalling Flash File System, Version 2.
4 * Copyright (C) 2001-2003 Red Hat, Inc.
8 * For licensing information, see the file 'LICENCE' in this directory.
10 * $Id: gc.c,v 1.146 2005/03/20 17:45:25 dedekind Exp $
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
25 struct jffs2_inode_cache *ic,
26 struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35 uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38 uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
40 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
45 struct jffs2_eraseblock *ret;
46 struct list_head *nextlist = NULL;
47 int n = jiffies % 128;
49 /* Pick an eraseblock to garbage collect next. This is where we'll
50 put the clever wear-levelling algorithms. Eventually. */
51 /* We possibly want to favour the dirtier blocks more when the
52 number of free blocks is low. */
54 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56 nextlist = &c->bad_used_list;
57 } else if (n < 50 && !list_empty(&c->erasable_list)) {
58 /* Note that most of them will have gone directly to be erased.
59 So don't favour the erasable_list _too_ much. */
60 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61 nextlist = &c->erasable_list;
62 } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63 /* Most of the time, pick one off the very_dirty list */
64 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65 nextlist = &c->very_dirty_list;
66 } else if (n < 126 && !list_empty(&c->dirty_list)) {
67 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68 nextlist = &c->dirty_list;
69 } else if (!list_empty(&c->clean_list)) {
70 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71 nextlist = &c->clean_list;
72 } else if (!list_empty(&c->dirty_list)) {
73 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
75 nextlist = &c->dirty_list;
76 } else if (!list_empty(&c->very_dirty_list)) {
77 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78 nextlist = &c->very_dirty_list;
79 } else if (!list_empty(&c->erasable_list)) {
80 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
82 nextlist = &c->erasable_list;
83 } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84 /* There are blocks are wating for the wbuf sync */
85 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86 jffs2_flush_wbuf_pad(c);
89 /* Eep. All were empty */
90 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
94 ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97 ret->gc_node = ret->first_node;
99 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
103 /* Have we accidentally picked a clean block with wasted space ? */
104 if (ret->wasted_size) {
105 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
106 ret->dirty_size += ret->wasted_size;
107 c->wasted_size -= ret->wasted_size;
108 c->dirty_size += ret->wasted_size;
109 ret->wasted_size = 0;
112 D2(jffs2_dump_block_lists(c));
116 /* jffs2_garbage_collect_pass
117 * Make a single attempt to progress GC. Move one node, and possibly
118 * start erasing one eraseblock.
120 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122 struct jffs2_inode_info *f;
123 struct jffs2_inode_cache *ic;
124 struct jffs2_eraseblock *jeb;
125 struct jffs2_raw_node_ref *raw;
126 int ret = 0, inum, nlink;
128 if (down_interruptible(&c->alloc_sem))
132 spin_lock(&c->erase_completion_lock);
133 if (!c->unchecked_size)
136 /* We can't start doing GC yet. We haven't finished checking
137 the node CRCs etc. Do it now. */
139 /* checked_ino is protected by the alloc_sem */
140 if (c->checked_ino > c->highest_ino) {
141 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143 D2(jffs2_dump_block_lists(c));
144 spin_unlock(&c->erase_completion_lock);
148 spin_unlock(&c->erase_completion_lock);
150 spin_lock(&c->inocache_lock);
152 ic = jffs2_get_ino_cache(c, c->checked_ino++);
155 spin_unlock(&c->inocache_lock);
160 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
162 spin_unlock(&c->inocache_lock);
166 case INO_STATE_CHECKEDABSENT:
167 case INO_STATE_PRESENT:
168 D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
169 spin_unlock(&c->inocache_lock);
173 case INO_STATE_CHECKING:
174 printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
175 spin_unlock(&c->inocache_lock);
178 case INO_STATE_READING:
179 /* We need to wait for it to finish, lest we move on
180 and trigger the BUG() above while we haven't yet
181 finished checking all its nodes */
182 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
184 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
190 case INO_STATE_UNCHECKED:
193 ic->state = INO_STATE_CHECKING;
194 spin_unlock(&c->inocache_lock);
196 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
198 ret = jffs2_do_crccheck_inode(c, ic);
200 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
202 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
207 /* First, work out which block we're garbage-collecting */
211 jeb = jffs2_find_gc_block(c);
214 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
215 spin_unlock(&c->erase_completion_lock);
220 D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
222 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
224 if (!jeb->used_size) {
231 while(ref_obsolete(raw)) {
232 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
233 raw = raw->next_phys;
234 if (unlikely(!raw)) {
235 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
236 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
237 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
239 spin_unlock(&c->erase_completion_lock);
246 D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
248 if (!raw->next_in_ino) {
249 /* Inode-less node. Clean marker, snapshot or something like that */
250 /* FIXME: If it's something that needs to be copied, including something
251 we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
252 spin_unlock(&c->erase_completion_lock);
253 jffs2_mark_node_obsolete(c, raw);
258 ic = jffs2_raw_ref_to_ic(raw);
260 /* We need to hold the inocache. Either the erase_completion_lock or
261 the inocache_lock are sufficient; we trade down since the inocache_lock
262 causes less contention. */
263 spin_lock(&c->inocache_lock);
265 spin_unlock(&c->erase_completion_lock);
267 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
269 /* Three possibilities:
270 1. Inode is already in-core. We must iget it and do proper
271 updating to its fragtree, etc.
272 2. Inode is not in-core, node is REF_PRISTINE. We lock the
273 inocache to prevent a read_inode(), copy the node intact.
274 3. Inode is not in-core, node is not pristine. We must iget()
275 and take the slow path.
279 case INO_STATE_CHECKEDABSENT:
280 /* It's been checked, but it's not currently in-core.
281 We can just copy any pristine nodes, but have
282 to prevent anyone else from doing read_inode() while
283 we're at it, so we set the state accordingly */
284 if (ref_flags(raw) == REF_PRISTINE)
285 ic->state = INO_STATE_GC;
287 D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
292 case INO_STATE_PRESENT:
293 /* It's in-core. GC must iget() it. */
296 case INO_STATE_UNCHECKED:
297 case INO_STATE_CHECKING:
299 /* Should never happen. We should have finished checking
300 by the time we actually start doing any GC, and since
301 we're holding the alloc_sem, no other garbage collection
304 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
307 spin_unlock(&c->inocache_lock);
310 case INO_STATE_READING:
311 /* Someone's currently trying to read it. We must wait for
312 them to finish and then go through the full iget() route
313 to do the GC. However, sometimes read_inode() needs to get
314 the alloc_sem() (for marking nodes invalid) so we must
315 drop the alloc_sem before sleeping. */
318 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
319 ic->ino, ic->state));
320 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
321 /* And because we dropped the alloc_sem we must start again from the
322 beginning. Ponder chance of livelock here -- we're returning success
323 without actually making any progress.
325 Q: What are the chances that the inode is back in INO_STATE_READING
326 again by the time we next enter this function? And that this happens
327 enough times to cause a real delay?
329 A: Small enough that I don't care :)
334 /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
335 node intact, and we don't have to muck about with the fragtree etc.
336 because we know it's not in-core. If it _was_ in-core, we go through
337 all the iget() crap anyway */
339 if (ic->state == INO_STATE_GC) {
340 spin_unlock(&c->inocache_lock);
342 ret = jffs2_garbage_collect_pristine(c, ic, raw);
344 spin_lock(&c->inocache_lock);
345 ic->state = INO_STATE_CHECKEDABSENT;
346 wake_up(&c->inocache_wq);
348 if (ret != -EBADFD) {
349 spin_unlock(&c->inocache_lock);
353 /* Fall through if it wanted us to, with inocache_lock held */
356 /* Prevent the fairly unlikely race where the gcblock is
357 entirely obsoleted by the final close of a file which had
358 the only valid nodes in the block, followed by erasure,
359 followed by freeing of the ic because the erased block(s)
360 held _all_ the nodes of that inode.... never been seen but
361 it's vaguely possible. */
365 spin_unlock(&c->inocache_lock);
367 f = jffs2_gc_fetch_inode(c, inum, nlink);
377 ret = jffs2_garbage_collect_live(c, jeb, raw, f);
379 jffs2_gc_release_inode(c, f);
385 /* If we've finished this block, start it erasing */
386 spin_lock(&c->erase_completion_lock);
389 if (c->gcblock && !c->gcblock->used_size) {
390 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
391 /* We're GC'ing an empty block? */
392 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
394 c->nr_erasing_blocks++;
395 jffs2_erase_pending_trigger(c);
397 spin_unlock(&c->erase_completion_lock);
402 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
403 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
405 struct jffs2_node_frag *frag;
406 struct jffs2_full_dnode *fn = NULL;
407 struct jffs2_full_dirent *fd;
408 uint32_t start = 0, end = 0, nrfrags = 0;
413 /* Now we have the lock for this inode. Check that it's still the one at the head
416 spin_lock(&c->erase_completion_lock);
418 if (c->gcblock != jeb) {
419 spin_unlock(&c->erase_completion_lock);
420 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
423 if (ref_obsolete(raw)) {
424 spin_unlock(&c->erase_completion_lock);
425 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
426 /* They'll call again */
429 spin_unlock(&c->erase_completion_lock);
431 /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
432 if (f->metadata && f->metadata->raw == raw) {
434 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
438 /* FIXME. Read node and do lookup? */
439 for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
440 if (frag->node && frag->node->raw == raw) {
442 end = frag->ofs + frag->size;
445 if (nrfrags == frag->node->frags)
446 break; /* We've found them all */
450 if (ref_flags(raw) == REF_PRISTINE) {
451 ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
453 /* Urgh. Return it sensibly. */
454 frag->node->raw = f->inocache->nodes;
459 /* We found a datanode. Do the GC */
460 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
461 /* It crosses a page boundary. Therefore, it must be a hole. */
462 ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
464 /* It could still be a hole. But we GC the page this way anyway */
465 ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
470 /* Wasn't a dnode. Try dirent */
471 for (fd = f->dents; fd; fd=fd->next) {
477 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
479 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
481 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
482 ref_offset(raw), f->inocache->ino);
483 if (ref_obsolete(raw)) {
484 printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
495 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
496 struct jffs2_inode_cache *ic,
497 struct jffs2_raw_node_ref *raw)
499 union jffs2_node_union *node;
500 struct jffs2_raw_node_ref *nraw;
503 uint32_t phys_ofs, alloclen;
504 uint32_t crc, rawlen;
507 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
509 rawlen = ref_totlen(c, c->gcblock, raw);
511 /* Ask for a small amount of space (or the totlen if smaller) because we
512 don't want to force wastage of the end of a block if splitting would
514 ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN,
515 rawlen), &phys_ofs, &alloclen);
519 if (alloclen < rawlen) {
520 /* Doesn't fit untouched. We'll go the old route and split it */
524 node = kmalloc(rawlen, GFP_KERNEL);
528 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
529 if (!ret && retlen != rawlen)
534 crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
535 if (je32_to_cpu(node->u.hdr_crc) != crc) {
536 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
537 ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
541 switch(je16_to_cpu(node->u.nodetype)) {
542 case JFFS2_NODETYPE_INODE:
543 crc = crc32(0, node, sizeof(node->i)-8);
544 if (je32_to_cpu(node->i.node_crc) != crc) {
545 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
546 ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
550 if (je32_to_cpu(node->i.dsize)) {
551 crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
552 if (je32_to_cpu(node->i.data_crc) != crc) {
553 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
554 ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
560 case JFFS2_NODETYPE_DIRENT:
561 crc = crc32(0, node, sizeof(node->d)-8);
562 if (je32_to_cpu(node->d.node_crc) != crc) {
563 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
564 ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
569 crc = crc32(0, node->d.name, node->d.nsize);
570 if (je32_to_cpu(node->d.name_crc) != crc) {
571 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
572 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
578 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
579 ref_offset(raw), je16_to_cpu(node->u.nodetype));
583 nraw = jffs2_alloc_raw_node_ref();
589 /* OK, all the CRCs are good; this node can just be copied as-is. */
591 nraw->flash_offset = phys_ofs;
592 nraw->__totlen = rawlen;
593 nraw->next_phys = NULL;
595 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
597 if (ret || (retlen != rawlen)) {
598 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
599 rawlen, phys_ofs, ret, retlen);
601 /* Doesn't belong to any inode */
602 nraw->next_in_ino = NULL;
604 nraw->flash_offset |= REF_OBSOLETE;
605 jffs2_add_physical_node_ref(c, nraw);
606 jffs2_mark_node_obsolete(c, nraw);
608 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
609 jffs2_free_raw_node_ref(nraw);
611 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
612 /* Try to reallocate space and retry */
614 struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
618 D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
620 ACCT_SANITY_CHECK(c,jeb);
621 D1(ACCT_PARANOIA_CHECK(jeb));
623 ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy);
626 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
628 ACCT_SANITY_CHECK(c,jeb);
629 D1(ACCT_PARANOIA_CHECK(jeb));
633 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
634 jffs2_free_raw_node_ref(nraw);
637 jffs2_free_raw_node_ref(nraw);
642 nraw->flash_offset |= REF_PRISTINE;
643 jffs2_add_physical_node_ref(c, nraw);
645 /* Link into per-inode list. This is safe because of the ic
646 state being INO_STATE_GC. Note that if we're doing this
647 for an inode which is in-core, the 'nraw' pointer is then
648 going to be fetched from ic->nodes by our caller. */
649 spin_lock(&c->erase_completion_lock);
650 nraw->next_in_ino = ic->nodes;
652 spin_unlock(&c->erase_completion_lock);
654 jffs2_mark_node_obsolete(c, raw);
655 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
665 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
666 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
668 struct jffs2_full_dnode *new_fn;
669 struct jffs2_raw_inode ri;
671 char *mdata = NULL, mdatalen = 0;
672 uint32_t alloclen, phys_ofs;
675 if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
676 S_ISCHR(JFFS2_F_I_MODE(f)) ) {
677 /* For these, we don't actually need to read the old node */
678 /* FIXME: for minor or major > 255. */
679 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) |
680 JFFS2_F_I_RDEV_MIN(f)));
681 mdata = (char *)&dev;
682 mdatalen = sizeof(dev);
683 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
684 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
686 mdata = kmalloc(fn->size, GFP_KERNEL);
688 printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
691 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
693 printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
697 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
701 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen);
703 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
704 sizeof(ri)+ mdatalen, ret);
708 memset(&ri, 0, sizeof(ri));
709 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
710 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
711 ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
712 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
714 ri.ino = cpu_to_je32(f->inocache->ino);
715 ri.version = cpu_to_je32(++f->highest_version);
716 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
717 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
718 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
719 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
720 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
721 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
722 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
723 ri.offset = cpu_to_je32(0);
724 ri.csize = cpu_to_je32(mdatalen);
725 ri.dsize = cpu_to_je32(mdatalen);
726 ri.compr = JFFS2_COMPR_NONE;
727 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
728 ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
730 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
732 if (IS_ERR(new_fn)) {
733 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
734 ret = PTR_ERR(new_fn);
737 jffs2_mark_node_obsolete(c, fn->raw);
738 jffs2_free_full_dnode(fn);
739 f->metadata = new_fn;
741 if (S_ISLNK(JFFS2_F_I_MODE(f)))
746 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
747 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
749 struct jffs2_full_dirent *new_fd;
750 struct jffs2_raw_dirent rd;
751 uint32_t alloclen, phys_ofs;
754 rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
755 rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
756 rd.nsize = strlen(fd->name);
757 rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
758 rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
760 rd.pino = cpu_to_je32(f->inocache->ino);
761 rd.version = cpu_to_je32(++f->highest_version);
762 rd.ino = cpu_to_je32(fd->ino);
763 rd.mctime = cpu_to_je32(max(JFFS2_F_I_MTIME(f), JFFS2_F_I_CTIME(f)));
765 rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
766 rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
768 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen);
770 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
771 sizeof(rd)+rd.nsize, ret);
774 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
776 if (IS_ERR(new_fd)) {
777 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
778 return PTR_ERR(new_fd);
780 jffs2_add_fd_to_list(c, new_fd, &f->dents);
784 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
785 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
787 struct jffs2_full_dirent **fdp = &f->dents;
790 /* On a medium where we can't actually mark nodes obsolete
791 pernamently, such as NAND flash, we need to work out
792 whether this deletion dirent is still needed to actively
793 delete a 'real' dirent with the same name that's still
794 somewhere else on the flash. */
795 if (!jffs2_can_mark_obsolete(c)) {
796 struct jffs2_raw_dirent *rd;
797 struct jffs2_raw_node_ref *raw;
800 int name_len = strlen(fd->name);
801 uint32_t name_crc = crc32(0, fd->name, name_len);
802 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
804 rd = kmalloc(rawlen, GFP_KERNEL);
808 /* Prevent the erase code from nicking the obsolete node refs while
809 we're looking at them. I really don't like this extra lock but
810 can't see any alternative. Suggestions on a postcard to... */
811 down(&c->erase_free_sem);
813 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
815 /* We only care about obsolete ones */
816 if (!(ref_obsolete(raw)))
819 /* Any dirent with the same name is going to have the same length... */
820 if (ref_totlen(c, NULL, raw) != rawlen)
823 /* Doesn't matter if there's one in the same erase block. We're going to
824 delete it too at the same time. */
825 if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
828 D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
830 /* This is an obsolete node belonging to the same directory, and it's of the right
831 length. We need to take a closer look...*/
832 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
834 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
835 /* If we can't read it, we don't need to continue to obsolete it. Continue */
838 if (retlen != rawlen) {
839 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
840 retlen, rawlen, ref_offset(raw));
844 if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
847 /* If the name CRC doesn't match, skip */
848 if (je32_to_cpu(rd->name_crc) != name_crc)
851 /* If the name length doesn't match, or it's another deletion dirent, skip */
852 if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
855 /* OK, check the actual name now */
856 if (memcmp(rd->name, fd->name, name_len))
859 /* OK. The name really does match. There really is still an older node on
860 the flash which our deletion dirent obsoletes. So we have to write out
861 a new deletion dirent to replace it */
862 up(&c->erase_free_sem);
864 D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
865 ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
868 return jffs2_garbage_collect_dirent(c, jeb, f, fd);
871 up(&c->erase_free_sem);
875 /* No need for it any more. Just mark it obsolete and remove it from the list */
885 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
887 jffs2_mark_node_obsolete(c, fd->raw);
888 jffs2_free_full_dirent(fd);
892 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
893 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
894 uint32_t start, uint32_t end)
896 struct jffs2_raw_inode ri;
897 struct jffs2_node_frag *frag;
898 struct jffs2_full_dnode *new_fn;
899 uint32_t alloclen, phys_ofs;
902 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
903 f->inocache->ino, start, end));
905 memset(&ri, 0, sizeof(ri));
910 /* It's partially obsoleted by a later write. So we have to
911 write it out again with the _same_ version as before */
912 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
913 if (readlen != sizeof(ri) || ret) {
914 printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
917 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
918 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
920 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
923 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
924 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
926 je32_to_cpu(ri.totlen), sizeof(ri));
929 crc = crc32(0, &ri, sizeof(ri)-8);
930 if (crc != je32_to_cpu(ri.node_crc)) {
931 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
933 je32_to_cpu(ri.node_crc), crc);
934 /* FIXME: We could possibly deal with this by writing new holes for each frag */
935 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
936 start, end, f->inocache->ino);
939 if (ri.compr != JFFS2_COMPR_ZERO) {
940 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
941 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
942 start, end, f->inocache->ino);
947 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
948 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
949 ri.totlen = cpu_to_je32(sizeof(ri));
950 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
952 ri.ino = cpu_to_je32(f->inocache->ino);
953 ri.version = cpu_to_je32(++f->highest_version);
954 ri.offset = cpu_to_je32(start);
955 ri.dsize = cpu_to_je32(end - start);
956 ri.csize = cpu_to_je32(0);
957 ri.compr = JFFS2_COMPR_ZERO;
959 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
960 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
961 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
962 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
963 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
964 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
965 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
966 ri.data_crc = cpu_to_je32(0);
967 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
969 ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen);
971 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
975 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
977 if (IS_ERR(new_fn)) {
978 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
979 return PTR_ERR(new_fn);
981 if (je32_to_cpu(ri.version) == f->highest_version) {
982 jffs2_add_full_dnode_to_inode(c, f, new_fn);
984 jffs2_mark_node_obsolete(c, f->metadata->raw);
985 jffs2_free_full_dnode(f->metadata);
992 * We should only get here in the case where the node we are
993 * replacing had more than one frag, so we kept the same version
994 * number as before. (Except in case of error -- see 'goto fill;'
997 D1(if(unlikely(fn->frags <= 1)) {
998 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
999 fn->frags, je32_to_cpu(ri.version), f->highest_version,
1000 je32_to_cpu(ri.ino));
1003 /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1004 mark_ref_normal(new_fn->raw);
1006 for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1007 frag; frag = frag_next(frag)) {
1008 if (frag->ofs > fn->size + fn->ofs)
1010 if (frag->node == fn) {
1011 frag->node = new_fn;
1017 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1020 if (!new_fn->frags) {
1021 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1025 jffs2_mark_node_obsolete(c, fn->raw);
1026 jffs2_free_full_dnode(fn);
1031 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1032 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1033 uint32_t start, uint32_t end)
1035 struct jffs2_full_dnode *new_fn;
1036 struct jffs2_raw_inode ri;
1037 uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
1039 unsigned char *comprbuf = NULL, *writebuf;
1041 unsigned char *pg_ptr;
1043 memset(&ri, 0, sizeof(ri));
1045 D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1046 f->inocache->ino, start, end));
1051 if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1052 /* Attempt to do some merging. But only expand to cover logically
1053 adjacent frags if the block containing them is already considered
1054 to be dirty. Otherwise we end up with GC just going round in
1055 circles dirtying the nodes it already wrote out, especially
1056 on NAND where we have small eraseblocks and hence a much higher
1057 chance of nodes having to be split to cross boundaries. */
1059 struct jffs2_node_frag *frag;
1062 min = start & ~(PAGE_CACHE_SIZE-1);
1063 max = min + PAGE_CACHE_SIZE;
1065 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1067 /* BUG_ON(!frag) but that'll happen anyway... */
1069 BUG_ON(frag->ofs != start);
1071 /* First grow down... */
1072 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1074 /* If the previous frag doesn't even reach the beginning, there's
1075 excessive fragmentation. Just merge. */
1076 if (frag->ofs > min) {
1077 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1078 frag->ofs, frag->ofs+frag->size));
1082 /* OK. This frag holds the first byte of the page. */
1083 if (!frag->node || !frag->node->raw) {
1084 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1085 frag->ofs, frag->ofs+frag->size));
1089 /* OK, it's a frag which extends to the beginning of the page. Does it live
1090 in a block which is still considered clean? If so, don't obsolete it.
1091 If not, cover it anyway. */
1093 struct jffs2_raw_node_ref *raw = frag->node->raw;
1094 struct jffs2_eraseblock *jeb;
1096 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1098 if (jeb == c->gcblock) {
1099 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1100 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1104 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1105 D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1106 frag->ofs, frag->ofs+frag->size, jeb->offset));
1110 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1111 frag->ofs, frag->ofs+frag->size, jeb->offset));
1119 /* Find last frag which is actually part of the node we're to GC. */
1120 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1122 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1124 /* If the previous frag doesn't even reach the beginning, there's lots
1125 of fragmentation. Just merge. */
1126 if (frag->ofs+frag->size < max) {
1127 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1128 frag->ofs, frag->ofs+frag->size));
1129 end = frag->ofs + frag->size;
1133 if (!frag->node || !frag->node->raw) {
1134 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1135 frag->ofs, frag->ofs+frag->size));
1139 /* OK, it's a frag which extends to the beginning of the page. Does it live
1140 in a block which is still considered clean? If so, don't obsolete it.
1141 If not, cover it anyway. */
1143 struct jffs2_raw_node_ref *raw = frag->node->raw;
1144 struct jffs2_eraseblock *jeb;
1146 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1148 if (jeb == c->gcblock) {
1149 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1150 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1151 end = frag->ofs + frag->size;
1154 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1155 D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1156 frag->ofs, frag->ofs+frag->size, jeb->offset));
1160 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1161 frag->ofs, frag->ofs+frag->size, jeb->offset));
1162 end = frag->ofs + frag->size;
1166 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1167 orig_start, orig_end, start, end));
1169 BUG_ON(end > JFFS2_F_I_SIZE(f));
1170 BUG_ON(end < orig_end);
1171 BUG_ON(start > orig_start);
1174 /* First, use readpage() to read the appropriate page into the page cache */
1175 /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1176 * triggered garbage collection in the first place?
1177 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1178 * page OK. We'll actually write it out again in commit_write, which is a little
1179 * suboptimal, but at least we're correct.
1181 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1183 if (IS_ERR(pg_ptr)) {
1184 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1185 return PTR_ERR(pg_ptr);
1189 while(offset < orig_end) {
1192 uint16_t comprtype = JFFS2_COMPR_NONE;
1194 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, &alloclen);
1197 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1198 sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1201 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1202 datalen = end - offset;
1204 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1206 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1208 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1209 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1210 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1211 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1213 ri.ino = cpu_to_je32(f->inocache->ino);
1214 ri.version = cpu_to_je32(++f->highest_version);
1215 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1216 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1217 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1218 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1219 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1220 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1221 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1222 ri.offset = cpu_to_je32(offset);
1223 ri.csize = cpu_to_je32(cdatalen);
1224 ri.dsize = cpu_to_je32(datalen);
1225 ri.compr = comprtype & 0xff;
1226 ri.usercompr = (comprtype >> 8) & 0xff;
1227 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1228 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1230 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1232 jffs2_free_comprbuf(comprbuf, writebuf);
1234 if (IS_ERR(new_fn)) {
1235 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1236 ret = PTR_ERR(new_fn);
1239 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1242 jffs2_mark_node_obsolete(c, f->metadata->raw);
1243 jffs2_free_full_dnode(f->metadata);
1248 jffs2_gc_release_page(c, pg_ptr, &pg);