]>
Commit | Line | Data |
---|---|---|
1e51764a AB |
1 | /* |
2 | * This file is part of UBIFS. | |
3 | * | |
4 | * Copyright (C) 2006-2008 Nokia Corporation. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms of the GNU General Public License version 2 as published by | |
8 | * the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
13 | * more details. | |
14 | * | |
15 | * You should have received a copy of the GNU General Public License along with | |
16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | |
17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | * | |
19 | * Authors: Adrian Hunter | |
20 | * Artem Bityutskiy (Битюцкий Артём) | |
21 | */ | |
22 | ||
23 | /* | |
24 | * This file implements functions that manage the running of the commit process. | |
25 | * Each affected module has its own functions to accomplish their part in the | |
26 | * commit and those functions are called here. | |
27 | * | |
28 | * The commit is the process whereby all updates to the index and LEB properties | |
29 | * are written out together and the journal becomes empty. This keeps the | |
30 | * file system consistent - at all times the state can be recreated by reading | |
31 | * the index and LEB properties and then replaying the journal. | |
32 | * | |
33 | * The commit is split into two parts named "commit start" and "commit end". | |
34 | * During commit start, the commit process has exclusive access to the journal | |
35 | * by holding the commit semaphore down for writing. As few I/O operations as | |
36 | * possible are performed during commit start, instead the nodes that are to be | |
37 | * written are merely identified. During commit end, the commit semaphore is no | |
38 | * longer held and the journal is again in operation, allowing users to continue | |
39 | * to use the file system while the bulk of the commit I/O is performed. The | |
40 | * purpose of this two-step approach is to prevent the commit from causing any | |
41 | * latency blips. Note that in any case, the commit does not prevent lookups | |
42 | * (as permitted by the TNC mutex), or access to VFS data structures e.g. page | |
43 | * cache. | |
44 | */ | |
45 | ||
46 | #include <linux/freezer.h> | |
47 | #include <linux/kthread.h> | |
5a0e3ad6 | 48 | #include <linux/slab.h> |
1e51764a AB |
49 | #include "ubifs.h" |
50 | ||
944fdef5 AB |
51 | /* |
52 | * nothing_to_commit - check if there is nothing to commit. | |
53 | * @c: UBIFS file-system description object | |
54 | * | |
55 | * This is a helper function which checks if there is anything to commit. It is | |
56 | * used as an optimization to avoid starting the commit if it is not really | |
57 | * necessary. Indeed, the commit operation always assumes flash I/O (e.g., | |
58 | * writing the commit start node to the log), and it is better to avoid doing | |
59 | * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is | |
60 | * nothing to commit, it is more optimal to avoid any flash I/O. | |
61 | * | |
62 | * This function has to be called with @c->commit_sem locked for writing - | |
63 | * this function does not take LPT/TNC locks because the @c->commit_sem | |
64 | * guarantees that we have exclusive access to the TNC and LPT data structures. | |
65 | * | |
66 | * This function returns %1 if there is nothing to commit and %0 otherwise. | |
67 | */ | |
68 | static int nothing_to_commit(struct ubifs_info *c) | |
69 | { | |
70 | /* | |
71 | * During mounting or remounting from R/O mode to R/W mode we may | |
72 | * commit for various recovery-related reasons. | |
73 | */ | |
74 | if (c->mounting || c->remounting_rw) | |
75 | return 0; | |
76 | ||
77 | /* | |
78 | * If the root TNC node is dirty, we definitely have something to | |
79 | * commit. | |
80 | */ | |
f42eed7c | 81 | if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode)) |
944fdef5 AB |
82 | return 0; |
83 | ||
84 | /* | |
85 | * Even though the TNC is clean, the LPT tree may have dirty nodes. For | |
86 | * example, this may happen if the budgeting subsystem invoked GC to | |
87 | * make some free space, and the GC found an LEB with only dirty and | |
88 | * free space. In this case GC would just change the lprops of this | |
89 | * LEB (by turning all space into free space) and unmap it. | |
90 | */ | |
91 | if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) | |
92 | return 0; | |
93 | ||
6eb61d58 RW |
94 | ubifs_assert(c, atomic_long_read(&c->dirty_zn_cnt) == 0); |
95 | ubifs_assert(c, c->dirty_pn_cnt == 0); | |
96 | ubifs_assert(c, c->dirty_nn_cnt == 0); | |
944fdef5 AB |
97 | |
98 | return 1; | |
99 | } | |
100 | ||
1e51764a AB |
101 | /** |
102 | * do_commit - commit the journal. | |
103 | * @c: UBIFS file-system description object | |
104 | * | |
105 | * This function implements UBIFS commit. It has to be called with commit lock | |
106 | * locked. Returns zero in case of success and a negative error code in case of | |
107 | * failure. | |
108 | */ | |
109 | static int do_commit(struct ubifs_info *c) | |
110 | { | |
111 | int err, new_ltail_lnum, old_ltail_lnum, i; | |
112 | struct ubifs_zbranch zroot; | |
113 | struct ubifs_lp_stats lst; | |
114 | ||
115 | dbg_cmt("start"); | |
6eb61d58 | 116 | ubifs_assert(c, !c->ro_media && !c->ro_mount); |
2680d722 AB |
117 | |
118 | if (c->ro_error) { | |
1e51764a AB |
119 | err = -EROFS; |
120 | goto out_up; | |
121 | } | |
122 | ||
944fdef5 AB |
123 | if (nothing_to_commit(c)) { |
124 | up_write(&c->commit_sem); | |
125 | err = 0; | |
126 | goto out_cancel; | |
127 | } | |
128 | ||
1e51764a AB |
129 | /* Sync all write buffers (necessary for recovery) */ |
130 | for (i = 0; i < c->jhead_cnt; i++) { | |
131 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); | |
132 | if (err) | |
133 | goto out_up; | |
134 | } | |
135 | ||
014eb04b | 136 | c->cmt_no += 1; |
1e51764a AB |
137 | err = ubifs_gc_start_commit(c); |
138 | if (err) | |
139 | goto out_up; | |
140 | err = dbg_check_lprops(c); | |
141 | if (err) | |
142 | goto out_up; | |
143 | err = ubifs_log_start_commit(c, &new_ltail_lnum); | |
144 | if (err) | |
145 | goto out_up; | |
146 | err = ubifs_tnc_start_commit(c, &zroot); | |
147 | if (err) | |
148 | goto out_up; | |
149 | err = ubifs_lpt_start_commit(c); | |
150 | if (err) | |
151 | goto out_up; | |
152 | err = ubifs_orphan_start_commit(c); | |
153 | if (err) | |
154 | goto out_up; | |
155 | ||
156 | ubifs_get_lp_stats(c, &lst); | |
157 | ||
158 | up_write(&c->commit_sem); | |
159 | ||
160 | err = ubifs_tnc_end_commit(c); | |
161 | if (err) | |
162 | goto out; | |
163 | err = ubifs_lpt_end_commit(c); | |
164 | if (err) | |
165 | goto out; | |
166 | err = ubifs_orphan_end_commit(c); | |
1e51764a AB |
167 | if (err) |
168 | goto out; | |
169 | err = dbg_check_old_index(c, &zroot); | |
170 | if (err) | |
171 | goto out; | |
172 | ||
014eb04b | 173 | c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); |
1e51764a AB |
174 | c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); |
175 | c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); | |
176 | c->mst_node->root_offs = cpu_to_le32(zroot.offs); | |
177 | c->mst_node->root_len = cpu_to_le32(zroot.len); | |
178 | c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); | |
179 | c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); | |
b137545c | 180 | c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); |
1e51764a AB |
181 | c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); |
182 | c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); | |
183 | c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); | |
184 | c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs); | |
185 | c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum); | |
186 | c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs); | |
187 | c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum); | |
188 | c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs); | |
189 | c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum); | |
190 | c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs); | |
191 | c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs); | |
192 | c->mst_node->total_free = cpu_to_le64(lst.total_free); | |
193 | c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty); | |
194 | c->mst_node->total_used = cpu_to_le64(lst.total_used); | |
195 | c->mst_node->total_dead = cpu_to_le64(lst.total_dead); | |
196 | c->mst_node->total_dark = cpu_to_le64(lst.total_dark); | |
197 | if (c->no_orphs) | |
198 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); | |
199 | else | |
200 | c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); | |
052c2807 AB |
201 | |
202 | old_ltail_lnum = c->ltail_lnum; | |
203 | err = ubifs_log_end_commit(c, new_ltail_lnum); | |
1e51764a AB |
204 | if (err) |
205 | goto out; | |
206 | ||
207 | err = ubifs_log_post_commit(c, old_ltail_lnum); | |
208 | if (err) | |
209 | goto out; | |
210 | err = ubifs_gc_end_commit(c); | |
211 | if (err) | |
212 | goto out; | |
213 | err = ubifs_lpt_post_commit(c); | |
214 | if (err) | |
215 | goto out; | |
216 | ||
944fdef5 | 217 | out_cancel: |
1e51764a AB |
218 | spin_lock(&c->cs_lock); |
219 | c->cmt_state = COMMIT_RESTING; | |
220 | wake_up(&c->cmt_wq); | |
221 | dbg_cmt("commit end"); | |
222 | spin_unlock(&c->cs_lock); | |
1e51764a AB |
223 | return 0; |
224 | ||
225 | out_up: | |
226 | up_write(&c->commit_sem); | |
227 | out: | |
235c362b | 228 | ubifs_err(c, "commit failed, error %d", err); |
1e51764a AB |
229 | spin_lock(&c->cs_lock); |
230 | c->cmt_state = COMMIT_BROKEN; | |
231 | wake_up(&c->cmt_wq); | |
232 | spin_unlock(&c->cs_lock); | |
233 | ubifs_ro_mode(c, err); | |
234 | return err; | |
235 | } | |
236 | ||
237 | /** | |
238 | * run_bg_commit - run background commit if it is needed. | |
239 | * @c: UBIFS file-system description object | |
240 | * | |
241 | * This function runs background commit if it is needed. Returns zero in case | |
242 | * of success and a negative error code in case of failure. | |
243 | */ | |
244 | static int run_bg_commit(struct ubifs_info *c) | |
245 | { | |
246 | spin_lock(&c->cs_lock); | |
247 | /* | |
248 | * Run background commit only if background commit was requested or if | |
249 | * commit is required. | |
250 | */ | |
251 | if (c->cmt_state != COMMIT_BACKGROUND && | |
252 | c->cmt_state != COMMIT_REQUIRED) | |
253 | goto out; | |
254 | spin_unlock(&c->cs_lock); | |
255 | ||
256 | down_write(&c->commit_sem); | |
257 | spin_lock(&c->cs_lock); | |
258 | if (c->cmt_state == COMMIT_REQUIRED) | |
259 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | |
260 | else if (c->cmt_state == COMMIT_BACKGROUND) | |
261 | c->cmt_state = COMMIT_RUNNING_BACKGROUND; | |
262 | else | |
263 | goto out_cmt_unlock; | |
264 | spin_unlock(&c->cs_lock); | |
265 | ||
266 | return do_commit(c); | |
267 | ||
268 | out_cmt_unlock: | |
269 | up_write(&c->commit_sem); | |
270 | out: | |
271 | spin_unlock(&c->cs_lock); | |
272 | return 0; | |
273 | } | |
274 | ||
275 | /** | |
276 | * ubifs_bg_thread - UBIFS background thread function. | |
277 | * @info: points to the file-system description object | |
278 | * | |
279 | * This function implements various file-system background activities: | |
280 | * o when a write-buffer timer expires it synchronizes the appropriate | |
281 | * write-buffer; | |
282 | * o when the journal is about to be full, it starts in-advance commit. | |
283 | * | |
284 | * Note, other stuff like background garbage collection may be added here in | |
285 | * future. | |
286 | */ | |
287 | int ubifs_bg_thread(void *info) | |
288 | { | |
289 | int err; | |
290 | struct ubifs_info *c = info; | |
291 | ||
235c362b | 292 | ubifs_msg(c, "background thread \"%s\" started, PID %d", |
3668b70f | 293 | c->bgt_name, current->pid); |
1e51764a AB |
294 | set_freezable(); |
295 | ||
296 | while (1) { | |
297 | if (kthread_should_stop()) | |
298 | break; | |
299 | ||
300 | if (try_to_freeze()) | |
301 | continue; | |
302 | ||
303 | set_current_state(TASK_INTERRUPTIBLE); | |
304 | /* Check if there is something to do */ | |
305 | if (!c->need_bgt) { | |
306 | /* | |
307 | * Nothing prevents us from going sleep now and | |
308 | * be never woken up and block the task which | |
309 | * could wait in 'kthread_stop()' forever. | |
310 | */ | |
311 | if (kthread_should_stop()) | |
312 | break; | |
313 | schedule(); | |
314 | continue; | |
315 | } else | |
316 | __set_current_state(TASK_RUNNING); | |
317 | ||
318 | c->need_bgt = 0; | |
319 | err = ubifs_bg_wbufs_sync(c); | |
320 | if (err) | |
321 | ubifs_ro_mode(c, err); | |
322 | ||
323 | run_bg_commit(c); | |
324 | cond_resched(); | |
325 | } | |
326 | ||
235c362b | 327 | ubifs_msg(c, "background thread \"%s\" stops", c->bgt_name); |
1e51764a AB |
328 | return 0; |
329 | } | |
330 | ||
331 | /** | |
332 | * ubifs_commit_required - set commit state to "required". | |
333 | * @c: UBIFS file-system description object | |
334 | * | |
335 | * This function is called if a commit is required but cannot be done from the | |
336 | * calling function, so it is just flagged instead. | |
337 | */ | |
338 | void ubifs_commit_required(struct ubifs_info *c) | |
339 | { | |
340 | spin_lock(&c->cs_lock); | |
341 | switch (c->cmt_state) { | |
342 | case COMMIT_RESTING: | |
343 | case COMMIT_BACKGROUND: | |
344 | dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), | |
345 | dbg_cstate(COMMIT_REQUIRED)); | |
346 | c->cmt_state = COMMIT_REQUIRED; | |
347 | break; | |
348 | case COMMIT_RUNNING_BACKGROUND: | |
349 | dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), | |
350 | dbg_cstate(COMMIT_RUNNING_REQUIRED)); | |
351 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | |
352 | break; | |
353 | case COMMIT_REQUIRED: | |
354 | case COMMIT_RUNNING_REQUIRED: | |
355 | case COMMIT_BROKEN: | |
356 | break; | |
357 | } | |
358 | spin_unlock(&c->cs_lock); | |
359 | } | |
360 | ||
361 | /** | |
362 | * ubifs_request_bg_commit - notify the background thread to do a commit. | |
363 | * @c: UBIFS file-system description object | |
364 | * | |
365 | * This function is called if the journal is full enough to make a commit | |
366 | * worthwhile, so background thread is kicked to start it. | |
367 | */ | |
368 | void ubifs_request_bg_commit(struct ubifs_info *c) | |
369 | { | |
370 | spin_lock(&c->cs_lock); | |
371 | if (c->cmt_state == COMMIT_RESTING) { | |
372 | dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), | |
373 | dbg_cstate(COMMIT_BACKGROUND)); | |
374 | c->cmt_state = COMMIT_BACKGROUND; | |
375 | spin_unlock(&c->cs_lock); | |
376 | ubifs_wake_up_bgt(c); | |
377 | } else | |
378 | spin_unlock(&c->cs_lock); | |
379 | } | |
380 | ||
381 | /** | |
382 | * wait_for_commit - wait for commit. | |
383 | * @c: UBIFS file-system description object | |
384 | * | |
385 | * This function sleeps until the commit operation is no longer running. | |
386 | */ | |
387 | static int wait_for_commit(struct ubifs_info *c) | |
388 | { | |
389 | dbg_cmt("pid %d goes sleep", current->pid); | |
390 | ||
391 | /* | |
392 | * The following sleeps if the condition is false, and will be woken | |
393 | * when the commit ends. It is possible, although very unlikely, that we | |
394 | * will wake up and see the subsequent commit running, rather than the | |
395 | * one we were waiting for, and go back to sleep. However, we will be | |
396 | * woken again, so there is no danger of sleeping forever. | |
397 | */ | |
398 | wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND && | |
399 | c->cmt_state != COMMIT_RUNNING_REQUIRED); | |
400 | dbg_cmt("commit finished, pid %d woke up", current->pid); | |
401 | return 0; | |
402 | } | |
403 | ||
404 | /** | |
405 | * ubifs_run_commit - run or wait for commit. | |
406 | * @c: UBIFS file-system description object | |
407 | * | |
408 | * This function runs commit and returns zero in case of success and a negative | |
409 | * error code in case of failure. | |
410 | */ | |
411 | int ubifs_run_commit(struct ubifs_info *c) | |
412 | { | |
413 | int err = 0; | |
414 | ||
415 | spin_lock(&c->cs_lock); | |
416 | if (c->cmt_state == COMMIT_BROKEN) { | |
549c999a | 417 | err = -EROFS; |
1e51764a AB |
418 | goto out; |
419 | } | |
420 | ||
421 | if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) | |
422 | /* | |
423 | * We set the commit state to 'running required' to indicate | |
424 | * that we want it to complete as quickly as possible. | |
425 | */ | |
426 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | |
427 | ||
428 | if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { | |
429 | spin_unlock(&c->cs_lock); | |
430 | return wait_for_commit(c); | |
431 | } | |
432 | spin_unlock(&c->cs_lock); | |
433 | ||
434 | /* Ok, the commit is indeed needed */ | |
435 | ||
436 | down_write(&c->commit_sem); | |
437 | spin_lock(&c->cs_lock); | |
438 | /* | |
439 | * Since we unlocked 'c->cs_lock', the state may have changed, so | |
440 | * re-check it. | |
441 | */ | |
442 | if (c->cmt_state == COMMIT_BROKEN) { | |
549c999a | 443 | err = -EROFS; |
1e51764a AB |
444 | goto out_cmt_unlock; |
445 | } | |
446 | ||
447 | if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) | |
448 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | |
449 | ||
450 | if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { | |
451 | up_write(&c->commit_sem); | |
452 | spin_unlock(&c->cs_lock); | |
453 | return wait_for_commit(c); | |
454 | } | |
455 | c->cmt_state = COMMIT_RUNNING_REQUIRED; | |
456 | spin_unlock(&c->cs_lock); | |
457 | ||
458 | err = do_commit(c); | |
459 | return err; | |
460 | ||
461 | out_cmt_unlock: | |
462 | up_write(&c->commit_sem); | |
463 | out: | |
464 | spin_unlock(&c->cs_lock); | |
465 | return err; | |
466 | } | |
467 | ||
468 | /** | |
469 | * ubifs_gc_should_commit - determine if it is time for GC to run commit. | |
470 | * @c: UBIFS file-system description object | |
471 | * | |
472 | * This function is called by garbage collection to determine if commit should | |
473 | * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal | |
474 | * is full enough to start commit, this function returns true. It is not | |
475 | * absolutely necessary to commit yet, but it feels like this should be better | |
476 | * then to keep doing GC. This function returns %1 if GC has to initiate commit | |
477 | * and %0 if not. | |
478 | */ | |
479 | int ubifs_gc_should_commit(struct ubifs_info *c) | |
480 | { | |
481 | int ret = 0; | |
482 | ||
483 | spin_lock(&c->cs_lock); | |
484 | if (c->cmt_state == COMMIT_BACKGROUND) { | |
485 | dbg_cmt("commit required now"); | |
486 | c->cmt_state = COMMIT_REQUIRED; | |
487 | } else | |
488 | dbg_cmt("commit not requested"); | |
489 | if (c->cmt_state == COMMIT_REQUIRED) | |
490 | ret = 1; | |
491 | spin_unlock(&c->cs_lock); | |
492 | return ret; | |
493 | } | |
494 | ||
f70b7e52 AB |
495 | /* |
496 | * Everything below is related to debugging. | |
497 | */ | |
1e51764a AB |
498 | |
499 | /** | |
500 | * struct idx_node - hold index nodes during index tree traversal. | |
501 | * @list: list | |
502 | * @iip: index in parent (slot number of this indexing node in the parent | |
503 | * indexing node) | |
504 | * @upper_key: all keys in this indexing node have to be less or equivalent to | |
505 | * this key | |
506 | * @idx: index node (8-byte aligned because all node structures must be 8-byte | |
507 | * aligned) | |
508 | */ | |
509 | struct idx_node { | |
510 | struct list_head list; | |
511 | int iip; | |
512 | union ubifs_key upper_key; | |
43457c60 | 513 | struct ubifs_idx_node idx __aligned(8); |
1e51764a AB |
514 | }; |
515 | ||
516 | /** | |
517 | * dbg_old_index_check_init - get information for the next old index check. | |
518 | * @c: UBIFS file-system description object | |
519 | * @zroot: root of the index | |
520 | * | |
521 | * This function records information about the index that will be needed for the | |
522 | * next old index check i.e. 'dbg_check_old_index()'. | |
523 | * | |
524 | * This function returns %0 on success and a negative error code on failure. | |
525 | */ | |
526 | int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) | |
527 | { | |
528 | struct ubifs_idx_node *idx; | |
529 | int lnum, offs, len, err = 0; | |
17c2f9f8 | 530 | struct ubifs_debug_info *d = c->dbg; |
1e51764a | 531 | |
17c2f9f8 AB |
532 | d->old_zroot = *zroot; |
533 | lnum = d->old_zroot.lnum; | |
534 | offs = d->old_zroot.offs; | |
535 | len = d->old_zroot.len; | |
1e51764a AB |
536 | |
537 | idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); | |
538 | if (!idx) | |
539 | return -ENOMEM; | |
540 | ||
541 | err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); | |
542 | if (err) | |
543 | goto out; | |
544 | ||
17c2f9f8 AB |
545 | d->old_zroot_level = le16_to_cpu(idx->level); |
546 | d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); | |
1e51764a AB |
547 | out: |
548 | kfree(idx); | |
549 | return err; | |
550 | } | |
551 | ||
552 | /** | |
553 | * dbg_check_old_index - check the old copy of the index. | |
554 | * @c: UBIFS file-system description object | |
555 | * @zroot: root of the new index | |
556 | * | |
557 | * In order to be able to recover from an unclean unmount, a complete copy of | |
558 | * the index must exist on flash. This is the "old" index. The commit process | |
559 | * must write the "new" index to flash without overwriting or destroying any | |
560 | * part of the old index. This function is run at commit end in order to check | |
561 | * that the old index does indeed exist completely intact. | |
562 | * | |
563 | * This function returns %0 on success and a negative error code on failure. | |
564 | */ | |
565 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) | |
566 | { | |
567 | int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; | |
568 | int first = 1, iip; | |
17c2f9f8 | 569 | struct ubifs_debug_info *d = c->dbg; |
83ef2ecd | 570 | union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key; |
1e51764a AB |
571 | unsigned long long uninitialized_var(last_sqnum); |
572 | struct ubifs_idx_node *idx; | |
573 | struct list_head list; | |
574 | struct idx_node *i; | |
575 | size_t sz; | |
576 | ||
8d7819b4 | 577 | if (!dbg_is_chk_index(c)) |
8b229c76 | 578 | return 0; |
1e51764a AB |
579 | |
580 | INIT_LIST_HEAD(&list); | |
581 | ||
582 | sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) - | |
583 | UBIFS_IDX_NODE_SZ; | |
584 | ||
585 | /* Start at the old zroot */ | |
17c2f9f8 AB |
586 | lnum = d->old_zroot.lnum; |
587 | offs = d->old_zroot.offs; | |
588 | len = d->old_zroot.len; | |
1e51764a AB |
589 | iip = 0; |
590 | ||
591 | /* | |
592 | * Traverse the index tree preorder depth-first i.e. do a node and then | |
593 | * its subtrees from left to right. | |
594 | */ | |
595 | while (1) { | |
596 | struct ubifs_branch *br; | |
597 | ||
598 | /* Get the next index node */ | |
599 | i = kmalloc(sz, GFP_NOFS); | |
600 | if (!i) { | |
601 | err = -ENOMEM; | |
602 | goto out_free; | |
603 | } | |
604 | i->iip = iip; | |
605 | /* Keep the index nodes on our path in a linked list */ | |
606 | list_add_tail(&i->list, &list); | |
607 | /* Read the index node */ | |
608 | idx = &i->idx; | |
609 | err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); | |
610 | if (err) | |
611 | goto out_free; | |
612 | /* Validate index node */ | |
613 | child_cnt = le16_to_cpu(idx->child_cnt); | |
614 | if (child_cnt < 1 || child_cnt > c->fanout) { | |
615 | err = 1; | |
616 | goto out_dump; | |
617 | } | |
618 | if (first) { | |
619 | first = 0; | |
620 | /* Check root level and sqnum */ | |
17c2f9f8 | 621 | if (le16_to_cpu(idx->level) != d->old_zroot_level) { |
1e51764a AB |
622 | err = 2; |
623 | goto out_dump; | |
624 | } | |
17c2f9f8 | 625 | if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) { |
1e51764a AB |
626 | err = 3; |
627 | goto out_dump; | |
628 | } | |
629 | /* Set last values as though root had a parent */ | |
630 | last_level = le16_to_cpu(idx->level) + 1; | |
631 | last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1; | |
632 | key_read(c, ubifs_idx_key(c, idx), &lower_key); | |
633 | highest_ino_key(c, &upper_key, INUM_WATERMARK); | |
634 | } | |
635 | key_copy(c, &upper_key, &i->upper_key); | |
636 | if (le16_to_cpu(idx->level) != last_level - 1) { | |
637 | err = 3; | |
638 | goto out_dump; | |
639 | } | |
640 | /* | |
641 | * The index is always written bottom up hence a child's sqnum | |
642 | * is always less than the parents. | |
643 | */ | |
644 | if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) { | |
645 | err = 4; | |
646 | goto out_dump; | |
647 | } | |
648 | /* Check key range */ | |
649 | key_read(c, ubifs_idx_key(c, idx), &l_key); | |
650 | br = ubifs_idx_branch(c, idx, child_cnt - 1); | |
651 | key_read(c, &br->key, &u_key); | |
652 | if (keys_cmp(c, &lower_key, &l_key) > 0) { | |
653 | err = 5; | |
654 | goto out_dump; | |
655 | } | |
656 | if (keys_cmp(c, &upper_key, &u_key) < 0) { | |
657 | err = 6; | |
658 | goto out_dump; | |
659 | } | |
660 | if (keys_cmp(c, &upper_key, &u_key) == 0) | |
661 | if (!is_hash_key(c, &u_key)) { | |
662 | err = 7; | |
663 | goto out_dump; | |
664 | } | |
665 | /* Go to next index node */ | |
666 | if (le16_to_cpu(idx->level) == 0) { | |
667 | /* At the bottom, so go up until can go right */ | |
668 | while (1) { | |
669 | /* Drop the bottom of the list */ | |
670 | list_del(&i->list); | |
671 | kfree(i); | |
672 | /* No more list means we are done */ | |
673 | if (list_empty(&list)) | |
674 | goto out; | |
675 | /* Look at the new bottom */ | |
676 | i = list_entry(list.prev, struct idx_node, | |
677 | list); | |
678 | idx = &i->idx; | |
679 | /* Can we go right */ | |
680 | if (iip + 1 < le16_to_cpu(idx->child_cnt)) { | |
681 | iip = iip + 1; | |
682 | break; | |
683 | } else | |
684 | /* Nope, so go up again */ | |
685 | iip = i->iip; | |
686 | } | |
687 | } else | |
688 | /* Go down left */ | |
689 | iip = 0; | |
690 | /* | |
691 | * We have the parent in 'idx' and now we set up for reading the | |
692 | * child pointed to by slot 'iip'. | |
693 | */ | |
694 | last_level = le16_to_cpu(idx->level); | |
695 | last_sqnum = le64_to_cpu(idx->ch.sqnum); | |
696 | br = ubifs_idx_branch(c, idx, iip); | |
697 | lnum = le32_to_cpu(br->lnum); | |
698 | offs = le32_to_cpu(br->offs); | |
699 | len = le32_to_cpu(br->len); | |
700 | key_read(c, &br->key, &lower_key); | |
701 | if (iip + 1 < le16_to_cpu(idx->child_cnt)) { | |
702 | br = ubifs_idx_branch(c, idx, iip + 1); | |
703 | key_read(c, &br->key, &upper_key); | |
704 | } else | |
705 | key_copy(c, &i->upper_key, &upper_key); | |
706 | } | |
707 | out: | |
708 | err = dbg_old_index_check_init(c, zroot); | |
709 | if (err) | |
710 | goto out_free; | |
711 | ||
712 | return 0; | |
713 | ||
714 | out_dump: | |
235c362b | 715 | ubifs_err(c, "dumping index node (iip=%d)", i->iip); |
edf6be24 | 716 | ubifs_dump_node(c, idx); |
1e51764a AB |
717 | list_del(&i->list); |
718 | kfree(i); | |
719 | if (!list_empty(&list)) { | |
720 | i = list_entry(list.prev, struct idx_node, list); | |
235c362b | 721 | ubifs_err(c, "dumping parent index node"); |
edf6be24 | 722 | ubifs_dump_node(c, &i->idx); |
1e51764a AB |
723 | } |
724 | out_free: | |
725 | while (!list_empty(&list)) { | |
726 | i = list_entry(list.next, struct idx_node, list); | |
727 | list_del(&i->list); | |
728 | kfree(i); | |
729 | } | |
235c362b | 730 | ubifs_err(c, "failed, error %d", err); |
1e51764a AB |
731 | if (err > 0) |
732 | err = -EINVAL; | |
733 | return err; | |
734 | } |