2 * linux/fs/jbd/commit.c
6 * Copyright 1998 Red Hat corp --- All Rights Reserved
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
12 * Journal commit routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system.
16 #include <linux/time.h>
18 #include <linux/jbd.h>
19 #include <linux/errno.h>
21 #include <linux/pagemap.h>
22 #include <linux/bio.h>
23 #include <linux/blkdev.h>
24 #include <trace/events/jbd.h>
27 * Default IO end handler for temporary BJ_IO buffer_heads.
29 static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
33 set_buffer_uptodate(bh);
35 clear_buffer_uptodate(bh);
40 * When an ext3-ordered file is truncated, it is possible that many pages are
41 * not successfully freed, because they are attached to a committing transaction.
42 * After the transaction commits, these pages are left on the LRU, with no
43 * ->mapping, and with attached buffers. These pages are trivially reclaimable
44 * by the VM, but their apparent absence upsets the VM accounting, and it makes
45 * the numbers in /proc/meminfo look odd.
47 * So here, we have a buffer which has just come off the forget list. Look to
48 * see if we can strip all buffers from the backing page.
50 * Called under journal->j_list_lock. The caller provided us with a ref
51 * against the buffer, and we drop that here.
53 static void release_buffer_page(struct buffer_head *bh)
59 if (atomic_read(&bh->b_count) != 1)
67 /* OK, it's a truncated page */
68 if (!trylock_page(page))
73 try_to_free_buffers(page);
75 page_cache_release(page);
83 * Decrement reference counter for data buffer. If it has been marked
84 * 'BH_Freed', release it and the page to which it belongs if possible.
86 static void release_data_buffer(struct buffer_head *bh)
88 if (buffer_freed(bh)) {
89 clear_buffer_freed(bh);
90 release_buffer_page(bh);
96 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
97 * held. For ranking reasons we must trylock. If we lose, schedule away and
98 * return 0. j_list_lock is dropped in this case.
100 static int inverted_lock(journal_t *journal, struct buffer_head *bh)
102 if (!jbd_trylock_bh_state(bh)) {
103 spin_unlock(&journal->j_list_lock);
110 /* Done it all: now write the commit record. We should have
111 * cleaned up our previous buffers by now, so if we are in abort
112 * mode we can now just skip the rest of the journal write
115 * Returns 1 if the journal needs to be aborted or 0 on success
117 static int journal_write_commit_record(journal_t *journal,
118 transaction_t *commit_transaction)
120 struct journal_head *descriptor;
121 struct buffer_head *bh;
122 journal_header_t *header;
125 if (is_journal_aborted(journal))
128 descriptor = journal_get_descriptor_buffer(journal);
132 bh = jh2bh(descriptor);
134 header = (journal_header_t *)(bh->b_data);
135 header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
136 header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
137 header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
139 JBUFFER_TRACE(descriptor, "write commit block");
140 set_buffer_dirty(bh);
142 if (journal->j_flags & JFS_BARRIER)
143 ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA);
145 ret = sync_dirty_buffer(bh);
147 put_bh(bh); /* One for getblk() */
148 journal_put_journal_head(descriptor);
150 return (ret == -EIO);
153 static void journal_do_submit_data(struct buffer_head **wbuf, int bufs,
158 for (i = 0; i < bufs; i++) {
159 wbuf[i]->b_end_io = end_buffer_write_sync;
160 /* We use-up our safety reference in submit_bh() */
161 submit_bh(write_op, wbuf[i]);
166 * Submit all the data buffers to disk
168 static int journal_submit_data_buffers(journal_t *journal,
169 transaction_t *commit_transaction,
172 struct journal_head *jh;
173 struct buffer_head *bh;
176 struct buffer_head **wbuf = journal->j_wbuf;
180 * Whenever we unlock the journal and sleep, things can get added
181 * onto ->t_sync_datalist, so we have to keep looping back to
182 * write_out_data until we *know* that the list is empty.
184 * Cleanup any flushed data buffers from the data list. Even in
185 * abort mode, we want to flush this out as soon as possible.
189 spin_lock(&journal->j_list_lock);
191 while (commit_transaction->t_sync_datalist) {
192 jh = commit_transaction->t_sync_datalist;
196 /* Get reference just to make sure buffer does not disappear
197 * when we are forced to drop various locks */
199 /* If the buffer is dirty, we need to submit IO and hence
200 * we need the buffer lock. We try to lock the buffer without
201 * blocking. If we fail, we need to drop j_list_lock and do
202 * blocking lock_buffer().
204 if (buffer_dirty(bh)) {
205 if (!trylock_buffer(bh)) {
206 BUFFER_TRACE(bh, "needs blocking lock");
207 spin_unlock(&journal->j_list_lock);
208 trace_jbd_do_submit_data(journal,
210 /* Write out all data to prevent deadlocks */
211 journal_do_submit_data(wbuf, bufs, write_op);
214 spin_lock(&journal->j_list_lock);
218 /* We have to get bh_state lock. Again out of order, sigh. */
219 if (!inverted_lock(journal, bh)) {
220 jbd_lock_bh_state(bh);
221 spin_lock(&journal->j_list_lock);
223 /* Someone already cleaned up the buffer? */
224 if (!buffer_jbd(bh) || bh2jh(bh) != jh
225 || jh->b_transaction != commit_transaction
226 || jh->b_jlist != BJ_SyncData) {
227 jbd_unlock_bh_state(bh);
230 BUFFER_TRACE(bh, "already cleaned up");
231 release_data_buffer(bh);
234 if (locked && test_clear_buffer_dirty(bh)) {
235 BUFFER_TRACE(bh, "needs writeout, adding to array");
237 __journal_file_buffer(jh, commit_transaction,
239 jbd_unlock_bh_state(bh);
240 if (bufs == journal->j_wbufsize) {
241 spin_unlock(&journal->j_list_lock);
242 trace_jbd_do_submit_data(journal,
244 journal_do_submit_data(wbuf, bufs, write_op);
248 } else if (!locked && buffer_locked(bh)) {
249 __journal_file_buffer(jh, commit_transaction,
251 jbd_unlock_bh_state(bh);
254 BUFFER_TRACE(bh, "writeout complete: unfile");
255 if (unlikely(!buffer_uptodate(bh)))
257 __journal_unfile_buffer(jh);
258 jbd_unlock_bh_state(bh);
261 release_data_buffer(bh);
264 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
265 spin_unlock(&journal->j_list_lock);
269 spin_unlock(&journal->j_list_lock);
270 trace_jbd_do_submit_data(journal, commit_transaction);
271 journal_do_submit_data(wbuf, bufs, write_op);
277 * journal_commit_transaction
279 * The primary function for committing a transaction to the log. This
280 * function is called by the journal thread to begin a complete commit.
282 void journal_commit_transaction(journal_t *journal)
284 transaction_t *commit_transaction;
285 struct journal_head *jh, *new_jh, *descriptor;
286 struct buffer_head **wbuf = journal->j_wbuf;
290 unsigned int blocknr;
294 journal_header_t *header;
295 journal_block_tag_t *tag = NULL;
300 struct blk_plug plug;
301 int write_op = WRITE;
304 * First job: lock down the current transaction and wait for
305 * all outstanding updates to complete.
308 /* Do we need to erase the effects of a prior journal_flush? */
309 if (journal->j_flags & JFS_FLUSHED) {
310 jbd_debug(3, "super block updated\n");
311 mutex_lock(&journal->j_checkpoint_mutex);
313 * We hold j_checkpoint_mutex so tail cannot change under us.
314 * We don't need any special data guarantees for writing sb
315 * since journal is empty and it is ok for write to be
316 * flushed only with transaction commit.
318 journal_update_sb_log_tail(journal, journal->j_tail_sequence,
319 journal->j_tail, WRITE_SYNC);
320 mutex_unlock(&journal->j_checkpoint_mutex);
322 jbd_debug(3, "superblock not updated\n");
325 J_ASSERT(journal->j_running_transaction != NULL);
326 J_ASSERT(journal->j_committing_transaction == NULL);
328 commit_transaction = journal->j_running_transaction;
329 J_ASSERT(commit_transaction->t_state == T_RUNNING);
331 trace_jbd_start_commit(journal, commit_transaction);
332 jbd_debug(1, "JBD: starting commit of transaction %d\n",
333 commit_transaction->t_tid);
335 spin_lock(&journal->j_state_lock);
336 commit_transaction->t_state = T_LOCKED;
338 trace_jbd_commit_locking(journal, commit_transaction);
339 spin_lock(&commit_transaction->t_handle_lock);
340 while (commit_transaction->t_updates) {
343 prepare_to_wait(&journal->j_wait_updates, &wait,
344 TASK_UNINTERRUPTIBLE);
345 if (commit_transaction->t_updates) {
346 spin_unlock(&commit_transaction->t_handle_lock);
347 spin_unlock(&journal->j_state_lock);
349 spin_lock(&journal->j_state_lock);
350 spin_lock(&commit_transaction->t_handle_lock);
352 finish_wait(&journal->j_wait_updates, &wait);
354 spin_unlock(&commit_transaction->t_handle_lock);
356 J_ASSERT (commit_transaction->t_outstanding_credits <=
357 journal->j_max_transaction_buffers);
360 * First thing we are allowed to do is to discard any remaining
361 * BJ_Reserved buffers. Note, it is _not_ permissible to assume
362 * that there are no such buffers: if a large filesystem
363 * operation like a truncate needs to split itself over multiple
364 * transactions, then it may try to do a journal_restart() while
365 * there are still BJ_Reserved buffers outstanding. These must
366 * be released cleanly from the current transaction.
368 * In this case, the filesystem must still reserve write access
369 * again before modifying the buffer in the new transaction, but
370 * we do not require it to remember exactly which old buffers it
371 * has reserved. This is consistent with the existing behaviour
372 * that multiple journal_get_write_access() calls to the same
373 * buffer are perfectly permissible.
375 while (commit_transaction->t_reserved_list) {
376 jh = commit_transaction->t_reserved_list;
377 JBUFFER_TRACE(jh, "reserved, unused: refile");
379 * A journal_get_undo_access()+journal_release_buffer() may
380 * leave undo-committed data.
382 if (jh->b_committed_data) {
383 struct buffer_head *bh = jh2bh(jh);
385 jbd_lock_bh_state(bh);
386 jbd_free(jh->b_committed_data, bh->b_size);
387 jh->b_committed_data = NULL;
388 jbd_unlock_bh_state(bh);
390 journal_refile_buffer(journal, jh);
394 * Now try to drop any written-back buffers from the journal's
395 * checkpoint lists. We do this *before* commit because it potentially
398 spin_lock(&journal->j_list_lock);
399 __journal_clean_checkpoint_list(journal);
400 spin_unlock(&journal->j_list_lock);
402 jbd_debug (3, "JBD: commit phase 1\n");
405 * Clear revoked flag to reflect there is no revoked buffers
406 * in the next transaction which is going to be started.
408 journal_clear_buffer_revoked_flags(journal);
411 * Switch to a new revoke table.
413 journal_switch_revoke_table(journal);
415 trace_jbd_commit_flushing(journal, commit_transaction);
416 commit_transaction->t_state = T_FLUSH;
417 journal->j_committing_transaction = commit_transaction;
418 journal->j_running_transaction = NULL;
419 start_time = ktime_get();
420 commit_transaction->t_log_start = journal->j_head;
421 wake_up(&journal->j_wait_transaction_locked);
422 spin_unlock(&journal->j_state_lock);
424 jbd_debug (3, "JBD: commit phase 2\n");
426 if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid))
427 write_op = WRITE_SYNC;
430 * Now start flushing things to disk, in the order they appear
431 * on the transaction lists. Data blocks go first.
433 blk_start_plug(&plug);
434 err = journal_submit_data_buffers(journal, commit_transaction,
436 blk_finish_plug(&plug);
439 * Wait for all previously submitted IO to complete.
441 spin_lock(&journal->j_list_lock);
442 while (commit_transaction->t_locked_list) {
443 struct buffer_head *bh;
445 jh = commit_transaction->t_locked_list->b_tprev;
448 if (buffer_locked(bh)) {
449 spin_unlock(&journal->j_list_lock);
451 spin_lock(&journal->j_list_lock);
453 if (unlikely(!buffer_uptodate(bh))) {
454 if (!trylock_page(bh->b_page)) {
455 spin_unlock(&journal->j_list_lock);
456 lock_page(bh->b_page);
457 spin_lock(&journal->j_list_lock);
459 if (bh->b_page->mapping)
460 set_bit(AS_EIO, &bh->b_page->mapping->flags);
462 unlock_page(bh->b_page);
463 SetPageError(bh->b_page);
466 if (!inverted_lock(journal, bh)) {
468 spin_lock(&journal->j_list_lock);
471 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
472 jh->b_transaction == commit_transaction &&
473 jh->b_jlist == BJ_Locked)
474 __journal_unfile_buffer(jh);
475 jbd_unlock_bh_state(bh);
476 release_data_buffer(bh);
477 cond_resched_lock(&journal->j_list_lock);
479 spin_unlock(&journal->j_list_lock);
482 char b[BDEVNAME_SIZE];
485 "JBD: Detected IO errors while flushing file data "
486 "on %s\n", bdevname(journal->j_fs_dev, b));
487 if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
488 journal_abort(journal, err);
492 blk_start_plug(&plug);
494 journal_write_revoke_records(journal, commit_transaction, write_op);
497 * If we found any dirty or locked buffers, then we should have
498 * looped back up to the write_out_data label. If there weren't
499 * any then journal_clean_data_list should have wiped the list
500 * clean by now, so check that it is in fact empty.
502 J_ASSERT (commit_transaction->t_sync_datalist == NULL);
504 jbd_debug (3, "JBD: commit phase 3\n");
507 * Way to go: we have now written out all of the data for a
508 * transaction! Now comes the tricky part: we need to write out
509 * metadata. Loop over the transaction's entire buffer list:
511 spin_lock(&journal->j_state_lock);
512 commit_transaction->t_state = T_COMMIT;
513 spin_unlock(&journal->j_state_lock);
515 trace_jbd_commit_logging(journal, commit_transaction);
516 J_ASSERT(commit_transaction->t_nr_buffers <=
517 commit_transaction->t_outstanding_credits);
521 while (commit_transaction->t_buffers) {
523 /* Find the next buffer to be journaled... */
525 jh = commit_transaction->t_buffers;
527 /* If we're in abort mode, we just un-journal the buffer and
530 if (is_journal_aborted(journal)) {
531 clear_buffer_jbddirty(jh2bh(jh));
532 JBUFFER_TRACE(jh, "journal is aborting: refile");
533 journal_refile_buffer(journal, jh);
534 /* If that was the last one, we need to clean up
535 * any descriptor buffers which may have been
536 * already allocated, even if we are now
538 if (!commit_transaction->t_buffers)
539 goto start_journal_io;
543 /* Make sure we have a descriptor block in which to
544 record the metadata buffer. */
547 struct buffer_head *bh;
549 J_ASSERT (bufs == 0);
551 jbd_debug(4, "JBD: get descriptor\n");
553 descriptor = journal_get_descriptor_buffer(journal);
555 journal_abort(journal, -EIO);
559 bh = jh2bh(descriptor);
560 jbd_debug(4, "JBD: got buffer %llu (%p)\n",
561 (unsigned long long)bh->b_blocknr, bh->b_data);
562 header = (journal_header_t *)&bh->b_data[0];
563 header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
564 header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
565 header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
567 tagp = &bh->b_data[sizeof(journal_header_t)];
568 space_left = bh->b_size - sizeof(journal_header_t);
570 set_buffer_jwrite(bh);
571 set_buffer_dirty(bh);
574 /* Record it so that we can wait for IO
576 BUFFER_TRACE(bh, "ph3: file as descriptor");
577 journal_file_buffer(descriptor, commit_transaction,
581 /* Where is the buffer to be written? */
583 err = journal_next_log_block(journal, &blocknr);
584 /* If the block mapping failed, just abandon the buffer
585 and repeat this loop: we'll fall into the
586 refile-on-abort condition above. */
588 journal_abort(journal, err);
593 * start_this_handle() uses t_outstanding_credits to determine
594 * the free space in the log, but this counter is changed
595 * by journal_next_log_block() also.
597 commit_transaction->t_outstanding_credits--;
599 /* Bump b_count to prevent truncate from stumbling over
600 the shadowed buffer! @@@ This can go if we ever get
601 rid of the BJ_IO/BJ_Shadow pairing of buffers. */
604 /* Make a temporary IO buffer with which to write it out
605 (this will requeue both the metadata buffer and the
606 temporary IO buffer). new_bh goes on BJ_IO*/
608 set_buffer_jwrite(jh2bh(jh));
610 * akpm: journal_write_metadata_buffer() sets
611 * new_bh->b_transaction to commit_transaction.
612 * We need to clean this up before we release new_bh
613 * (which is of type BJ_IO)
615 JBUFFER_TRACE(jh, "ph3: write metadata");
616 flags = journal_write_metadata_buffer(commit_transaction,
617 jh, &new_jh, blocknr);
618 set_buffer_jwrite(jh2bh(new_jh));
619 wbuf[bufs++] = jh2bh(new_jh);
621 /* Record the new block's tag in the current descriptor
626 tag_flag |= JFS_FLAG_ESCAPE;
628 tag_flag |= JFS_FLAG_SAME_UUID;
630 tag = (journal_block_tag_t *) tagp;
631 tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
632 tag->t_flags = cpu_to_be32(tag_flag);
633 tagp += sizeof(journal_block_tag_t);
634 space_left -= sizeof(journal_block_tag_t);
637 memcpy (tagp, journal->j_uuid, 16);
643 /* If there's no more to do, or if the descriptor is full,
646 if (bufs == journal->j_wbufsize ||
647 commit_transaction->t_buffers == NULL ||
648 space_left < sizeof(journal_block_tag_t) + 16) {
650 jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
652 /* Write an end-of-descriptor marker before
653 submitting the IOs. "tag" still points to
654 the last tag we set up. */
656 tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
659 for (i = 0; i < bufs; i++) {
660 struct buffer_head *bh = wbuf[i];
662 clear_buffer_dirty(bh);
663 set_buffer_uptodate(bh);
664 bh->b_end_io = journal_end_buffer_io_sync;
665 submit_bh(write_op, bh);
669 /* Force a new descriptor to be generated next
670 time round the loop. */
676 blk_finish_plug(&plug);
678 /* Lo and behold: we have just managed to send a transaction to
679 the log. Before we can commit it, wait for the IO so far to
680 complete. Control buffers being written are on the
681 transaction's t_log_list queue, and metadata buffers are on
682 the t_iobuf_list queue.
684 Wait for the buffers in reverse order. That way we are
685 less likely to be woken up until all IOs have completed, and
686 so we incur less scheduling load.
689 jbd_debug(3, "JBD: commit phase 4\n");
692 * akpm: these are BJ_IO, and j_list_lock is not needed.
693 * See __journal_try_to_free_buffer.
696 while (commit_transaction->t_iobuf_list != NULL) {
697 struct buffer_head *bh;
699 jh = commit_transaction->t_iobuf_list->b_tprev;
701 if (buffer_locked(bh)) {
708 if (unlikely(!buffer_uptodate(bh)))
711 clear_buffer_jwrite(bh);
713 JBUFFER_TRACE(jh, "ph4: unfile after journal write");
714 journal_unfile_buffer(journal, jh);
717 * ->t_iobuf_list should contain only dummy buffer_heads
718 * which were created by journal_write_metadata_buffer().
720 BUFFER_TRACE(bh, "dumping temporary bh");
721 journal_put_journal_head(jh);
723 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
724 free_buffer_head(bh);
726 /* We also have to unlock and free the corresponding
728 jh = commit_transaction->t_shadow_list->b_tprev;
730 clear_buffer_jwrite(bh);
731 J_ASSERT_BH(bh, buffer_jbddirty(bh));
733 /* The metadata is now released for reuse, but we need
734 to remember it against this transaction so that when
735 we finally commit, we can do any checkpointing
737 JBUFFER_TRACE(jh, "file as BJ_Forget");
738 journal_file_buffer(jh, commit_transaction, BJ_Forget);
740 * Wake up any transactions which were waiting for this
741 * IO to complete. The barrier must be here so that changes
742 * by journal_file_buffer() take effect before wake_up_bit()
743 * does the waitqueue check.
746 wake_up_bit(&bh->b_state, BH_Unshadow);
747 JBUFFER_TRACE(jh, "brelse shadowed buffer");
751 J_ASSERT (commit_transaction->t_shadow_list == NULL);
753 jbd_debug(3, "JBD: commit phase 5\n");
755 /* Here we wait for the revoke record and descriptor record buffers */
757 while (commit_transaction->t_log_list != NULL) {
758 struct buffer_head *bh;
760 jh = commit_transaction->t_log_list->b_tprev;
762 if (buffer_locked(bh)) {
764 goto wait_for_ctlbuf;
767 goto wait_for_ctlbuf;
769 if (unlikely(!buffer_uptodate(bh)))
772 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
773 clear_buffer_jwrite(bh);
774 journal_unfile_buffer(journal, jh);
775 journal_put_journal_head(jh);
776 __brelse(bh); /* One for getblk */
777 /* AKPM: bforget here */
781 journal_abort(journal, err);
783 jbd_debug(3, "JBD: commit phase 6\n");
785 /* All metadata is written, now write commit record and do cleanup */
786 spin_lock(&journal->j_state_lock);
787 J_ASSERT(commit_transaction->t_state == T_COMMIT);
788 commit_transaction->t_state = T_COMMIT_RECORD;
789 spin_unlock(&journal->j_state_lock);
791 if (journal_write_commit_record(journal, commit_transaction))
795 journal_abort(journal, err);
797 /* End of a transaction! Finally, we can do checkpoint
798 processing: any buffers committed as a result of this
799 transaction can be removed from any checkpoint list it was on
802 jbd_debug(3, "JBD: commit phase 7\n");
804 J_ASSERT(commit_transaction->t_sync_datalist == NULL);
805 J_ASSERT(commit_transaction->t_buffers == NULL);
806 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
807 J_ASSERT(commit_transaction->t_iobuf_list == NULL);
808 J_ASSERT(commit_transaction->t_shadow_list == NULL);
809 J_ASSERT(commit_transaction->t_log_list == NULL);
813 * As there are other places (journal_unmap_buffer()) adding buffers
814 * to this list we have to be careful and hold the j_list_lock.
816 spin_lock(&journal->j_list_lock);
817 while (commit_transaction->t_forget) {
818 transaction_t *cp_transaction;
819 struct buffer_head *bh;
822 jh = commit_transaction->t_forget;
823 spin_unlock(&journal->j_list_lock);
826 * Get a reference so that bh cannot be freed before we are
830 jbd_lock_bh_state(bh);
831 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
832 jh->b_transaction == journal->j_running_transaction);
835 * If there is undo-protected committed data against
836 * this buffer, then we can remove it now. If it is a
837 * buffer needing such protection, the old frozen_data
838 * field now points to a committed version of the
839 * buffer, so rotate that field to the new committed
842 * Otherwise, we can just throw away the frozen data now.
844 if (jh->b_committed_data) {
845 jbd_free(jh->b_committed_data, bh->b_size);
846 jh->b_committed_data = NULL;
847 if (jh->b_frozen_data) {
848 jh->b_committed_data = jh->b_frozen_data;
849 jh->b_frozen_data = NULL;
851 } else if (jh->b_frozen_data) {
852 jbd_free(jh->b_frozen_data, bh->b_size);
853 jh->b_frozen_data = NULL;
856 spin_lock(&journal->j_list_lock);
857 cp_transaction = jh->b_cp_transaction;
858 if (cp_transaction) {
859 JBUFFER_TRACE(jh, "remove from old cp transaction");
860 __journal_remove_checkpoint(jh);
863 /* Only re-checkpoint the buffer_head if it is marked
864 * dirty. If the buffer was added to the BJ_Forget list
865 * by journal_forget, it may no longer be dirty and
866 * there's no point in keeping a checkpoint record for
869 /* A buffer which has been freed while still being
870 * journaled by a previous transaction may end up still
871 * being dirty here, but we want to avoid writing back
872 * that buffer in the future after the "add to orphan"
873 * operation been committed, That's not only a performance
874 * gain, it also stops aliasing problems if the buffer is
875 * left behind for writeback and gets reallocated for another
876 * use in a different page. */
877 if (buffer_freed(bh) && !jh->b_next_transaction) {
878 clear_buffer_freed(bh);
879 clear_buffer_jbddirty(bh);
882 if (buffer_jbddirty(bh)) {
883 JBUFFER_TRACE(jh, "add to new checkpointing trans");
884 __journal_insert_checkpoint(jh, commit_transaction);
885 if (is_journal_aborted(journal))
886 clear_buffer_jbddirty(bh);
888 J_ASSERT_BH(bh, !buffer_dirty(bh));
890 * The buffer on BJ_Forget list and not jbddirty means
891 * it has been freed by this transaction and hence it
892 * could not have been reallocated until this
893 * transaction has committed. *BUT* it could be
894 * reallocated once we have written all the data to
895 * disk and before we process the buffer on BJ_Forget
898 if (!jh->b_next_transaction)
901 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
902 __journal_refile_buffer(jh);
903 jbd_unlock_bh_state(bh);
905 release_buffer_page(bh);
908 cond_resched_lock(&journal->j_list_lock);
910 spin_unlock(&journal->j_list_lock);
912 * This is a bit sleazy. We use j_list_lock to protect transition
913 * of a transaction into T_FINISHED state and calling
914 * __journal_drop_transaction(). Otherwise we could race with
915 * other checkpointing code processing the transaction...
917 spin_lock(&journal->j_state_lock);
918 spin_lock(&journal->j_list_lock);
920 * Now recheck if some buffers did not get attached to the transaction
921 * while the lock was dropped...
923 if (commit_transaction->t_forget) {
924 spin_unlock(&journal->j_list_lock);
925 spin_unlock(&journal->j_state_lock);
929 /* Done with this transaction! */
931 jbd_debug(3, "JBD: commit phase 8\n");
933 J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD);
935 commit_transaction->t_state = T_FINISHED;
936 J_ASSERT(commit_transaction == journal->j_committing_transaction);
937 journal->j_commit_sequence = commit_transaction->t_tid;
938 journal->j_committing_transaction = NULL;
939 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
942 * weight the commit time higher than the average time so we don't
943 * react too strongly to vast changes in commit time
945 if (likely(journal->j_average_commit_time))
946 journal->j_average_commit_time = (commit_time*3 +
947 journal->j_average_commit_time) / 4;
949 journal->j_average_commit_time = commit_time;
951 spin_unlock(&journal->j_state_lock);
953 if (commit_transaction->t_checkpoint_list == NULL &&
954 commit_transaction->t_checkpoint_io_list == NULL) {
955 __journal_drop_transaction(journal, commit_transaction);
957 if (journal->j_checkpoint_transactions == NULL) {
958 journal->j_checkpoint_transactions = commit_transaction;
959 commit_transaction->t_cpnext = commit_transaction;
960 commit_transaction->t_cpprev = commit_transaction;
962 commit_transaction->t_cpnext =
963 journal->j_checkpoint_transactions;
964 commit_transaction->t_cpprev =
965 commit_transaction->t_cpnext->t_cpprev;
966 commit_transaction->t_cpnext->t_cpprev =
968 commit_transaction->t_cpprev->t_cpnext =
972 spin_unlock(&journal->j_list_lock);
974 trace_jbd_end_commit(journal, commit_transaction);
975 jbd_debug(1, "JBD: commit %d complete, head %d\n",
976 journal->j_commit_sequence, journal->j_tail_sequence);
978 wake_up(&journal->j_wait_done_commit);