Branch data Line data Source code
1 : : /*
2 : : * linux/fs/jbd/commit.c
3 : : *
4 : : * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
5 : : *
6 : : * Copyright 1998 Red Hat corp --- All Rights Reserved
7 : : *
8 : : * This file is part of the Linux kernel and is made available under
9 : : * the terms of the GNU General Public License, version 2, or at your
10 : : * option, any later version, incorporated herein by reference.
11 : : *
12 : : * Journal commit routines for the generic filesystem journaling code;
13 : : * part of the ext2fs journaling system.
14 : : */
15 : :
16 : : #include <linux/time.h>
17 : : #include <linux/fs.h>
18 : : #include <linux/jbd.h>
19 : : #include <linux/errno.h>
20 : : #include <linux/mm.h>
21 : : #include <linux/pagemap.h>
22 : : #include <linux/bio.h>
23 : : #include <linux/blkdev.h>
24 : : #include <trace/events/jbd.h>
25 : :
26 : : /*
27 : : * Default IO end handler for temporary BJ_IO buffer_heads.
28 : : */
29 : 0 : static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
30 : : {
31 : : BUFFER_TRACE(bh, "");
32 [ # # ]: 0 : if (uptodate)
33 : : set_buffer_uptodate(bh);
34 : : else
35 : : clear_buffer_uptodate(bh);
36 : 0 : unlock_buffer(bh);
37 : 0 : }
38 : :
39 : : /*
40 : : * When an ext3-ordered file is truncated, it is possible that many pages are
41 : : * not successfully freed, because they are attached to a committing transaction.
42 : : * After the transaction commits, these pages are left on the LRU, with no
43 : : * ->mapping, and with attached buffers. These pages are trivially reclaimable
44 : : * by the VM, but their apparent absence upsets the VM accounting, and it makes
45 : : * the numbers in /proc/meminfo look odd.
46 : : *
47 : : * So here, we have a buffer which has just come off the forget list. Look to
48 : : * see if we can strip all buffers from the backing page.
49 : : *
50 : : * Called under journal->j_list_lock. The caller provided us with a ref
51 : : * against the buffer, and we drop that here.
52 : : */
53 : 0 : static void release_buffer_page(struct buffer_head *bh)
54 : : {
55 : : struct page *page;
56 : :
57 [ # # ]: 0 : if (buffer_dirty(bh))
58 : : goto nope;
59 [ # # ]: 0 : if (atomic_read(&bh->b_count) != 1)
60 : : goto nope;
61 : 0 : page = bh->b_page;
62 [ # # ]: 0 : if (!page)
63 : : goto nope;
64 [ # # ]: 0 : if (page->mapping)
65 : : goto nope;
66 : :
67 : : /* OK, it's a truncated page */
68 [ # # ]: 0 : if (!trylock_page(page))
69 : : goto nope;
70 : :
71 : : page_cache_get(page);
72 : 0 : __brelse(bh);
73 : 0 : try_to_free_buffers(page);
74 : 0 : unlock_page(page);
75 : 0 : page_cache_release(page);
76 : 0 : return;
77 : :
78 : : nope:
79 : 0 : __brelse(bh);
80 : : }
81 : :
82 : : /*
83 : : * Decrement reference counter for data buffer. If it has been marked
84 : : * 'BH_Freed', release it and the page to which it belongs if possible.
85 : : */
86 : 0 : static void release_data_buffer(struct buffer_head *bh)
87 : : {
88 [ # # ]: 0 : if (buffer_freed(bh)) {
89 [ # # ][ # # ]: 0 : WARN_ON_ONCE(buffer_dirty(bh));
[ # # ]
90 : : clear_buffer_freed(bh);
91 : : clear_buffer_mapped(bh);
92 : : clear_buffer_new(bh);
93 : : clear_buffer_req(bh);
94 : 0 : bh->b_bdev = NULL;
95 : 0 : release_buffer_page(bh);
96 : : } else
97 : : put_bh(bh);
98 : 0 : }
99 : :
100 : : /*
101 : : * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
102 : : * held. For ranking reasons we must trylock. If we lose, schedule away and
103 : : * return 0. j_list_lock is dropped in this case.
104 : : */
105 : 0 : static int inverted_lock(journal_t *journal, struct buffer_head *bh)
106 : : {
107 [ # # ]: 0 : if (!jbd_trylock_bh_state(bh)) {
108 : : spin_unlock(&journal->j_list_lock);
109 : 0 : schedule();
110 : 0 : return 0;
111 : : }
112 : : return 1;
113 : : }
114 : :
115 : : /* Done it all: now write the commit record. We should have
116 : : * cleaned up our previous buffers by now, so if we are in abort
117 : : * mode we can now just skip the rest of the journal write
118 : : * entirely.
119 : : *
120 : : * Returns 1 if the journal needs to be aborted or 0 on success
121 : : */
122 : 0 : static int journal_write_commit_record(journal_t *journal,
123 : : transaction_t *commit_transaction)
124 : : {
125 : 0 : struct journal_head *descriptor;
126 : : struct buffer_head *bh;
127 : : journal_header_t *header;
128 : : int ret;
129 : :
130 [ # # ]: 0 : if (is_journal_aborted(journal))
131 : : return 0;
132 : :
133 : 0 : descriptor = journal_get_descriptor_buffer(journal);
134 [ # # ]: 0 : if (!descriptor)
135 : : return 1;
136 : :
137 : : bh = jh2bh(descriptor);
138 : :
139 : 0 : header = (journal_header_t *)(bh->b_data);
140 : 0 : header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
141 : 0 : header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
142 [ # # ]: 0 : header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
143 : :
144 : : JBUFFER_TRACE(descriptor, "write commit block");
145 : : set_buffer_dirty(bh);
146 : :
147 [ # # ]: 0 : if (journal->j_flags & JFS_BARRIER)
148 : 0 : ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA);
149 : : else
150 : 0 : ret = sync_dirty_buffer(bh);
151 : :
152 : : put_bh(bh); /* One for getblk() */
153 : 0 : journal_put_journal_head(descriptor);
154 : :
155 : 0 : return (ret == -EIO);
156 : : }
157 : :
158 : 0 : static void journal_do_submit_data(struct buffer_head **wbuf, int bufs,
159 : : int write_op)
160 : : {
161 : : int i;
162 : :
163 [ # # ]: 0 : for (i = 0; i < bufs; i++) {
164 : 0 : wbuf[i]->b_end_io = end_buffer_write_sync;
165 : : /*
166 : : * Here we write back pagecache data that may be mmaped. Since
167 : : * we cannot afford to clean the page and set PageWriteback
168 : : * here due to lock ordering (page lock ranks above transaction
169 : : * start), the data can change while IO is in flight. Tell the
170 : : * block layer it should bounce the bio pages if stable data
171 : : * during write is required.
172 : : *
173 : : * We use up our safety reference in submit_bh().
174 : : */
175 : 0 : _submit_bh(write_op, wbuf[i], 1 << BIO_SNAP_STABLE);
176 : : }
177 : 0 : }
178 : :
179 : : /*
180 : : * Submit all the data buffers to disk
181 : : */
182 : 0 : static int journal_submit_data_buffers(journal_t *journal,
183 : : transaction_t *commit_transaction,
184 : : int write_op)
185 : : {
186 : 0 : struct journal_head *jh;
187 : 0 : struct buffer_head *bh;
188 : : int locked;
189 : : int bufs = 0;
190 : 0 : struct buffer_head **wbuf = journal->j_wbuf;
191 : : int err = 0;
192 : :
193 : : /*
194 : : * Whenever we unlock the journal and sleep, things can get added
195 : : * onto ->t_sync_datalist, so we have to keep looping back to
196 : : * write_out_data until we *know* that the list is empty.
197 : : *
198 : : * Cleanup any flushed data buffers from the data list. Even in
199 : : * abort mode, we want to flush this out as soon as possible.
200 : : */
201 : : write_out_data:
202 : 0 : cond_resched();
203 : : spin_lock(&journal->j_list_lock);
204 : :
205 [ # # ]: 0 : while (commit_transaction->t_sync_datalist) {
206 : : jh = commit_transaction->t_sync_datalist;
207 : : bh = jh2bh(jh);
208 : : locked = 0;
209 : :
210 : : /* Get reference just to make sure buffer does not disappear
211 : : * when we are forced to drop various locks */
212 : : get_bh(bh);
213 : : /* If the buffer is dirty, we need to submit IO and hence
214 : : * we need the buffer lock. We try to lock the buffer without
215 : : * blocking. If we fail, we need to drop j_list_lock and do
216 : : * blocking lock_buffer().
217 : : */
218 [ # # ]: 0 : if (buffer_dirty(bh)) {
219 [ # # ]: 0 : if (!trylock_buffer(bh)) {
220 : : BUFFER_TRACE(bh, "needs blocking lock");
221 : : spin_unlock(&journal->j_list_lock);
222 : : trace_jbd_do_submit_data(journal,
223 : : commit_transaction);
224 : : /* Write out all data to prevent deadlocks */
225 : 0 : journal_do_submit_data(wbuf, bufs, write_op);
226 : : bufs = 0;
227 : : lock_buffer(bh);
228 : : spin_lock(&journal->j_list_lock);
229 : : }
230 : : locked = 1;
231 : : }
232 : : /* We have to get bh_state lock. Again out of order, sigh. */
233 [ # # ]: 0 : if (!inverted_lock(journal, bh)) {
234 : : jbd_lock_bh_state(bh);
235 : : spin_lock(&journal->j_list_lock);
236 : : }
237 : : /* Someone already cleaned up the buffer? */
238 [ # # ][ # # ]: 0 : if (!buffer_jbd(bh) || bh2jh(bh) != jh
239 [ # # ]: 0 : || jh->b_transaction != commit_transaction
240 [ # # ]: 0 : || jh->b_jlist != BJ_SyncData) {
241 : : jbd_unlock_bh_state(bh);
242 [ # # ]: 0 : if (locked)
243 : 0 : unlock_buffer(bh);
244 : : BUFFER_TRACE(bh, "already cleaned up");
245 : 0 : release_data_buffer(bh);
246 : 0 : continue;
247 : : }
248 [ # # # # ]: 0 : if (locked && test_clear_buffer_dirty(bh)) {
249 : : BUFFER_TRACE(bh, "needs writeout, adding to array");
250 : 0 : wbuf[bufs++] = bh;
251 : 0 : __journal_file_buffer(jh, commit_transaction,
252 : : BJ_Locked);
253 : : jbd_unlock_bh_state(bh);
254 [ # # ]: 0 : if (bufs == journal->j_wbufsize) {
255 : : spin_unlock(&journal->j_list_lock);
256 : : trace_jbd_do_submit_data(journal,
257 : : commit_transaction);
258 : 0 : journal_do_submit_data(wbuf, bufs, write_op);
259 : : bufs = 0;
260 : 0 : goto write_out_data;
261 : : }
262 [ # # ][ # # ]: 0 : } else if (!locked && buffer_locked(bh)) {
263 : 0 : __journal_file_buffer(jh, commit_transaction,
264 : : BJ_Locked);
265 : : jbd_unlock_bh_state(bh);
266 : : put_bh(bh);
267 : : } else {
268 : : BUFFER_TRACE(bh, "writeout complete: unfile");
269 [ # # ]: 0 : if (unlikely(!buffer_uptodate(bh)))
270 : : err = -EIO;
271 : 0 : __journal_unfile_buffer(jh);
272 : : jbd_unlock_bh_state(bh);
273 [ # # ]: 0 : if (locked)
274 : 0 : unlock_buffer(bh);
275 : 0 : release_data_buffer(bh);
276 : : }
277 : :
278 [ # # ]: 0 : if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
279 : : spin_unlock(&journal->j_list_lock);
280 : : goto write_out_data;
281 : : }
282 : : }
283 : : spin_unlock(&journal->j_list_lock);
284 : : trace_jbd_do_submit_data(journal, commit_transaction);
285 : 0 : journal_do_submit_data(wbuf, bufs, write_op);
286 : :
287 : 0 : return err;
288 : : }
289 : :
290 : : /*
291 : : * journal_commit_transaction
292 : : *
293 : : * The primary function for committing a transaction to the log. This
294 : : * function is called by the journal thread to begin a complete commit.
295 : : */
296 : 0 : void journal_commit_transaction(journal_t *journal)
297 : : {
298 : : transaction_t *commit_transaction;
299 : 0 : struct journal_head *jh, *new_jh, *descriptor;
300 : 0 : struct buffer_head **wbuf = journal->j_wbuf;
301 : : int bufs;
302 : : int flags;
303 : : int err;
304 : : unsigned int blocknr;
305 : : ktime_t start_time;
306 : : u64 commit_time;
307 : : char *tagp = NULL;
308 : : journal_header_t *header;
309 : : journal_block_tag_t *tag = NULL;
310 : : int space_left = 0;
311 : : int first_tag = 0;
312 : : int tag_flag;
313 : : int i;
314 : : struct blk_plug plug;
315 : : int write_op = WRITE;
316 : :
317 : : /*
318 : : * First job: lock down the current transaction and wait for
319 : : * all outstanding updates to complete.
320 : : */
321 : :
322 : : /* Do we need to erase the effects of a prior journal_flush? */
323 [ # # ]: 0 : if (journal->j_flags & JFS_FLUSHED) {
324 : : jbd_debug(3, "super block updated\n");
325 : 0 : mutex_lock(&journal->j_checkpoint_mutex);
326 : : /*
327 : : * We hold j_checkpoint_mutex so tail cannot change under us.
328 : : * We don't need any special data guarantees for writing sb
329 : : * since journal is empty and it is ok for write to be
330 : : * flushed only with transaction commit.
331 : : */
332 : 0 : journal_update_sb_log_tail(journal, journal->j_tail_sequence,
333 : : journal->j_tail, WRITE_SYNC);
334 : 0 : mutex_unlock(&journal->j_checkpoint_mutex);
335 : : } else {
336 : : jbd_debug(3, "superblock not updated\n");
337 : : }
338 : :
339 [ # # ]: 0 : J_ASSERT(journal->j_running_transaction != NULL);
340 [ # # ]: 0 : J_ASSERT(journal->j_committing_transaction == NULL);
341 : :
342 : : commit_transaction = journal->j_running_transaction;
343 : :
344 : : trace_jbd_start_commit(journal, commit_transaction);
345 : : jbd_debug(1, "JBD: starting commit of transaction %d\n",
346 : : commit_transaction->t_tid);
347 : :
348 : : spin_lock(&journal->j_state_lock);
349 [ # # ]: 0 : J_ASSERT(commit_transaction->t_state == T_RUNNING);
350 : 0 : commit_transaction->t_state = T_LOCKED;
351 : :
352 : : trace_jbd_commit_locking(journal, commit_transaction);
353 : : spin_lock(&commit_transaction->t_handle_lock);
354 [ # # ]: 0 : while (commit_transaction->t_updates) {
355 : 0 : DEFINE_WAIT(wait);
356 : :
357 : 0 : prepare_to_wait(&journal->j_wait_updates, &wait,
358 : : TASK_UNINTERRUPTIBLE);
359 [ # # ]: 0 : if (commit_transaction->t_updates) {
360 : : spin_unlock(&commit_transaction->t_handle_lock);
361 : : spin_unlock(&journal->j_state_lock);
362 : 0 : schedule();
363 : : spin_lock(&journal->j_state_lock);
364 : : spin_lock(&commit_transaction->t_handle_lock);
365 : : }
366 : 0 : finish_wait(&journal->j_wait_updates, &wait);
367 : : }
368 : : spin_unlock(&commit_transaction->t_handle_lock);
369 : :
370 [ # # ]: 0 : J_ASSERT (commit_transaction->t_outstanding_credits <=
371 : : journal->j_max_transaction_buffers);
372 : :
373 : : /*
374 : : * First thing we are allowed to do is to discard any remaining
375 : : * BJ_Reserved buffers. Note, it is _not_ permissible to assume
376 : : * that there are no such buffers: if a large filesystem
377 : : * operation like a truncate needs to split itself over multiple
378 : : * transactions, then it may try to do a journal_restart() while
379 : : * there are still BJ_Reserved buffers outstanding. These must
380 : : * be released cleanly from the current transaction.
381 : : *
382 : : * In this case, the filesystem must still reserve write access
383 : : * again before modifying the buffer in the new transaction, but
384 : : * we do not require it to remember exactly which old buffers it
385 : : * has reserved. This is consistent with the existing behaviour
386 : : * that multiple journal_get_write_access() calls to the same
387 : : * buffer are perfectly permissible.
388 : : */
389 [ # # ]: 0 : while (commit_transaction->t_reserved_list) {
390 : : jh = commit_transaction->t_reserved_list;
391 : : JBUFFER_TRACE(jh, "reserved, unused: refile");
392 : : /*
393 : : * A journal_get_undo_access()+journal_release_buffer() may
394 : : * leave undo-committed data.
395 : : */
396 [ # # ]: 0 : if (jh->b_committed_data) {
397 : : struct buffer_head *bh = jh2bh(jh);
398 : :
399 : : jbd_lock_bh_state(bh);
400 : 0 : jbd_free(jh->b_committed_data, bh->b_size);
401 : 0 : jh->b_committed_data = NULL;
402 : : jbd_unlock_bh_state(bh);
403 : : }
404 : 0 : journal_refile_buffer(journal, jh);
405 : : }
406 : :
407 : : /*
408 : : * Now try to drop any written-back buffers from the journal's
409 : : * checkpoint lists. We do this *before* commit because it potentially
410 : : * frees some memory
411 : : */
412 : : spin_lock(&journal->j_list_lock);
413 : 0 : __journal_clean_checkpoint_list(journal);
414 : : spin_unlock(&journal->j_list_lock);
415 : :
416 : : jbd_debug (3, "JBD: commit phase 1\n");
417 : :
418 : : /*
419 : : * Clear revoked flag to reflect there is no revoked buffers
420 : : * in the next transaction which is going to be started.
421 : : */
422 : 0 : journal_clear_buffer_revoked_flags(journal);
423 : :
424 : : /*
425 : : * Switch to a new revoke table.
426 : : */
427 : 0 : journal_switch_revoke_table(journal);
428 : :
429 : : trace_jbd_commit_flushing(journal, commit_transaction);
430 : 0 : commit_transaction->t_state = T_FLUSH;
431 : 0 : journal->j_committing_transaction = commit_transaction;
432 : 0 : journal->j_running_transaction = NULL;
433 : 0 : start_time = ktime_get();
434 : 0 : commit_transaction->t_log_start = journal->j_head;
435 : 0 : wake_up(&journal->j_wait_transaction_locked);
436 : : spin_unlock(&journal->j_state_lock);
437 : :
438 : : jbd_debug (3, "JBD: commit phase 2\n");
439 : :
440 [ # # ]: 0 : if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid))
441 : : write_op = WRITE_SYNC;
442 : :
443 : : /*
444 : : * Now start flushing things to disk, in the order they appear
445 : : * on the transaction lists. Data blocks go first.
446 : : */
447 : 0 : blk_start_plug(&plug);
448 : 0 : err = journal_submit_data_buffers(journal, commit_transaction,
449 : : write_op);
450 : 0 : blk_finish_plug(&plug);
451 : :
452 : : /*
453 : : * Wait for all previously submitted IO to complete.
454 : : */
455 : : spin_lock(&journal->j_list_lock);
456 [ # # ]: 0 : while (commit_transaction->t_locked_list) {
457 : 0 : struct buffer_head *bh;
458 : :
459 : 0 : jh = commit_transaction->t_locked_list->b_tprev;
460 : : bh = jh2bh(jh);
461 : : get_bh(bh);
462 [ # # ]: 0 : if (buffer_locked(bh)) {
463 : : spin_unlock(&journal->j_list_lock);
464 : : wait_on_buffer(bh);
465 : : spin_lock(&journal->j_list_lock);
466 : : }
467 [ # # ]: 0 : if (unlikely(!buffer_uptodate(bh))) {
468 [ # # ]: 0 : if (!trylock_page(bh->b_page)) {
469 : : spin_unlock(&journal->j_list_lock);
470 : 0 : lock_page(bh->b_page);
471 : : spin_lock(&journal->j_list_lock);
472 : : }
473 [ # # ]: 0 : if (bh->b_page->mapping)
474 : 0 : set_bit(AS_EIO, &bh->b_page->mapping->flags);
475 : :
476 : 0 : unlock_page(bh->b_page);
477 : 0 : SetPageError(bh->b_page);
478 : : err = -EIO;
479 : : }
480 [ # # ]: 0 : if (!inverted_lock(journal, bh)) {
481 : : put_bh(bh);
482 : : spin_lock(&journal->j_list_lock);
483 : 0 : continue;
484 : : }
485 [ # # ][ # # ]: 0 : if (buffer_jbd(bh) && bh2jh(bh) == jh &&
[ # # ]
486 [ # # ]: 0 : jh->b_transaction == commit_transaction &&
487 : 0 : jh->b_jlist == BJ_Locked)
488 : 0 : __journal_unfile_buffer(jh);
489 : : jbd_unlock_bh_state(bh);
490 : 0 : release_data_buffer(bh);
491 : 0 : cond_resched_lock(&journal->j_list_lock);
492 : : }
493 : : spin_unlock(&journal->j_list_lock);
494 : :
495 [ # # ]: 0 : if (err) {
496 : : char b[BDEVNAME_SIZE];
497 : :
498 : 0 : printk(KERN_WARNING
499 : : "JBD: Detected IO errors while flushing file data "
500 : : "on %s\n", bdevname(journal->j_fs_dev, b));
501 [ # # ]: 0 : if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
502 : 0 : journal_abort(journal, err);
503 : : err = 0;
504 : : }
505 : :
506 : 0 : blk_start_plug(&plug);
507 : :
508 : 0 : journal_write_revoke_records(journal, commit_transaction, write_op);
509 : :
510 : : /*
511 : : * If we found any dirty or locked buffers, then we should have
512 : : * looped back up to the write_out_data label. If there weren't
513 : : * any then journal_clean_data_list should have wiped the list
514 : : * clean by now, so check that it is in fact empty.
515 : : */
516 [ # # ]: 0 : J_ASSERT (commit_transaction->t_sync_datalist == NULL);
517 : :
518 : : jbd_debug (3, "JBD: commit phase 3\n");
519 : :
520 : : /*
521 : : * Way to go: we have now written out all of the data for a
522 : : * transaction! Now comes the tricky part: we need to write out
523 : : * metadata. Loop over the transaction's entire buffer list:
524 : : */
525 : : spin_lock(&journal->j_state_lock);
526 : 0 : commit_transaction->t_state = T_COMMIT;
527 : : spin_unlock(&journal->j_state_lock);
528 : :
529 : : trace_jbd_commit_logging(journal, commit_transaction);
530 [ # # ]: 0 : J_ASSERT(commit_transaction->t_nr_buffers <=
531 : : commit_transaction->t_outstanding_credits);
532 : :
533 : : descriptor = NULL;
534 : : bufs = 0;
535 [ # # ]: 0 : while (commit_transaction->t_buffers) {
536 : :
537 : : /* Find the next buffer to be journaled... */
538 : :
539 : : jh = commit_transaction->t_buffers;
540 : :
541 : : /* If we're in abort mode, we just un-journal the buffer and
542 : : release it. */
543 : :
544 [ # # ]: 0 : if (is_journal_aborted(journal)) {
545 : : clear_buffer_jbddirty(jh2bh(jh));
546 : : JBUFFER_TRACE(jh, "journal is aborting: refile");
547 : 0 : journal_refile_buffer(journal, jh);
548 : : /* If that was the last one, we need to clean up
549 : : * any descriptor buffers which may have been
550 : : * already allocated, even if we are now
551 : : * aborting. */
552 [ # # ]: 0 : if (!commit_transaction->t_buffers)
553 : : goto start_journal_io;
554 : 0 : continue;
555 : : }
556 : :
557 : : /* Make sure we have a descriptor block in which to
558 : : record the metadata buffer. */
559 : :
560 [ # # ]: 0 : if (!descriptor) {
561 : : struct buffer_head *bh;
562 : :
563 [ # # ]: 0 : J_ASSERT (bufs == 0);
564 : :
565 : : jbd_debug(4, "JBD: get descriptor\n");
566 : :
567 : 0 : descriptor = journal_get_descriptor_buffer(journal);
568 [ # # ]: 0 : if (!descriptor) {
569 : 0 : journal_abort(journal, -EIO);
570 : 0 : continue;
571 : : }
572 : :
573 : : bh = jh2bh(descriptor);
574 : : jbd_debug(4, "JBD: got buffer %llu (%p)\n",
575 : : (unsigned long long)bh->b_blocknr, bh->b_data);
576 : 0 : header = (journal_header_t *)&bh->b_data[0];
577 : 0 : header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
578 : 0 : header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
579 [ # # ]: 0 : header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
580 : :
581 : 0 : tagp = &bh->b_data[sizeof(journal_header_t)];
582 : 0 : space_left = bh->b_size - sizeof(journal_header_t);
583 : : first_tag = 1;
584 : : set_buffer_jwrite(bh);
585 : : set_buffer_dirty(bh);
586 : 0 : wbuf[bufs++] = bh;
587 : :
588 : : /* Record it so that we can wait for IO
589 : : completion later */
590 : : BUFFER_TRACE(bh, "ph3: file as descriptor");
591 : 0 : journal_file_buffer(descriptor, commit_transaction,
592 : : BJ_LogCtl);
593 : : }
594 : :
595 : : /* Where is the buffer to be written? */
596 : :
597 : 0 : err = journal_next_log_block(journal, &blocknr);
598 : : /* If the block mapping failed, just abandon the buffer
599 : : and repeat this loop: we'll fall into the
600 : : refile-on-abort condition above. */
601 [ # # ]: 0 : if (err) {
602 : 0 : journal_abort(journal, err);
603 : 0 : continue;
604 : : }
605 : :
606 : : /*
607 : : * start_this_handle() uses t_outstanding_credits to determine
608 : : * the free space in the log, but this counter is changed
609 : : * by journal_next_log_block() also.
610 : : */
611 : 0 : commit_transaction->t_outstanding_credits--;
612 : :
613 : : /* Bump b_count to prevent truncate from stumbling over
614 : : the shadowed buffer! @@@ This can go if we ever get
615 : : rid of the BJ_IO/BJ_Shadow pairing of buffers. */
616 : : get_bh(jh2bh(jh));
617 : :
618 : : /* Make a temporary IO buffer with which to write it out
619 : : (this will requeue both the metadata buffer and the
620 : : temporary IO buffer). new_bh goes on BJ_IO*/
621 : :
622 : : set_buffer_jwrite(jh2bh(jh));
623 : : /*
624 : : * akpm: journal_write_metadata_buffer() sets
625 : : * new_bh->b_transaction to commit_transaction.
626 : : * We need to clean this up before we release new_bh
627 : : * (which is of type BJ_IO)
628 : : */
629 : : JBUFFER_TRACE(jh, "ph3: write metadata");
630 : 0 : flags = journal_write_metadata_buffer(commit_transaction,
631 : : jh, &new_jh, blocknr);
632 : 0 : set_buffer_jwrite(jh2bh(new_jh));
633 : 0 : wbuf[bufs++] = jh2bh(new_jh);
634 : :
635 : : /* Record the new block's tag in the current descriptor
636 : : buffer */
637 : :
638 : : tag_flag = 0;
639 [ # # ]: 0 : if (flags & 1)
640 : : tag_flag |= JFS_FLAG_ESCAPE;
641 [ # # ]: 0 : if (!first_tag)
642 : 0 : tag_flag |= JFS_FLAG_SAME_UUID;
643 : :
644 : : tag = (journal_block_tag_t *) tagp;
645 : 0 : tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
646 [ # # ]: 0 : tag->t_flags = cpu_to_be32(tag_flag);
647 : 0 : tagp += sizeof(journal_block_tag_t);
648 : 0 : space_left -= sizeof(journal_block_tag_t);
649 : :
650 [ # # ]: 0 : if (first_tag) {
651 : 0 : memcpy (tagp, journal->j_uuid, 16);
652 : 0 : tagp += 16;
653 : 0 : space_left -= 16;
654 : : first_tag = 0;
655 : : }
656 : :
657 : : /* If there's no more to do, or if the descriptor is full,
658 : : let the IO rip! */
659 : :
660 [ # # ][ # # ]: 0 : if (bufs == journal->j_wbufsize ||
661 [ # # ]: 0 : commit_transaction->t_buffers == NULL ||
662 : 0 : space_left < sizeof(journal_block_tag_t) + 16) {
663 : :
664 : : jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
665 : :
666 : : /* Write an end-of-descriptor marker before
667 : : submitting the IOs. "tag" still points to
668 : : the last tag we set up. */
669 : :
670 : 0 : tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
671 : :
672 : : start_journal_io:
673 [ # # ]: 0 : for (i = 0; i < bufs; i++) {
674 : 0 : struct buffer_head *bh = wbuf[i];
675 : : lock_buffer(bh);
676 : : clear_buffer_dirty(bh);
677 : : set_buffer_uptodate(bh);
678 : 0 : bh->b_end_io = journal_end_buffer_io_sync;
679 : : /*
680 : : * In data=journal mode, here we can end up
681 : : * writing pagecache data that might be
682 : : * mmapped. Since we can't afford to clean the
683 : : * page and set PageWriteback (see the comment
684 : : * near the other use of _submit_bh()), the
685 : : * data can change while the write is in
686 : : * flight. Tell the block layer to bounce the
687 : : * bio pages if stable pages are required.
688 : : */
689 : 0 : _submit_bh(write_op, bh, 1 << BIO_SNAP_STABLE);
690 : : }
691 : 0 : cond_resched();
692 : :
693 : : /* Force a new descriptor to be generated next
694 : : time round the loop. */
695 : : descriptor = NULL;
696 : : bufs = 0;
697 : : }
698 : : }
699 : :
700 : 0 : blk_finish_plug(&plug);
701 : :
702 : : /* Lo and behold: we have just managed to send a transaction to
703 : : the log. Before we can commit it, wait for the IO so far to
704 : : complete. Control buffers being written are on the
705 : : transaction's t_log_list queue, and metadata buffers are on
706 : : the t_iobuf_list queue.
707 : :
708 : : Wait for the buffers in reverse order. That way we are
709 : : less likely to be woken up until all IOs have completed, and
710 : : so we incur less scheduling load.
711 : : */
712 : :
713 : : jbd_debug(3, "JBD: commit phase 4\n");
714 : :
715 : : /*
716 : : * akpm: these are BJ_IO, and j_list_lock is not needed.
717 : : * See __journal_try_to_free_buffer.
718 : : */
719 : : wait_for_iobuf:
720 [ # # ]: 0 : while (commit_transaction->t_iobuf_list != NULL) {
721 : : struct buffer_head *bh;
722 : :
723 : 0 : jh = commit_transaction->t_iobuf_list->b_tprev;
724 : : bh = jh2bh(jh);
725 [ # # ]: 0 : if (buffer_locked(bh)) {
726 : : wait_on_buffer(bh);
727 : : goto wait_for_iobuf;
728 : : }
729 [ # # ]: 0 : if (cond_resched())
730 : : goto wait_for_iobuf;
731 : :
732 [ # # ]: 0 : if (unlikely(!buffer_uptodate(bh)))
733 : : err = -EIO;
734 : :
735 : : clear_buffer_jwrite(bh);
736 : :
737 : : JBUFFER_TRACE(jh, "ph4: unfile after journal write");
738 : 0 : journal_unfile_buffer(journal, jh);
739 : :
740 : : /*
741 : : * ->t_iobuf_list should contain only dummy buffer_heads
742 : : * which were created by journal_write_metadata_buffer().
743 : : */
744 : : BUFFER_TRACE(bh, "dumping temporary bh");
745 : 0 : journal_put_journal_head(jh);
746 : 0 : __brelse(bh);
747 [ # # ]: 0 : J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
748 : 0 : free_buffer_head(bh);
749 : :
750 : : /* We also have to unlock and free the corresponding
751 : : shadowed buffer */
752 : 0 : jh = commit_transaction->t_shadow_list->b_tprev;
753 : : bh = jh2bh(jh);
754 : : clear_buffer_jwrite(bh);
755 [ # # ]: 0 : J_ASSERT_BH(bh, buffer_jbddirty(bh));
756 : :
757 : : /* The metadata is now released for reuse, but we need
758 : : to remember it against this transaction so that when
759 : : we finally commit, we can do any checkpointing
760 : : required. */
761 : : JBUFFER_TRACE(jh, "file as BJ_Forget");
762 : 0 : journal_file_buffer(jh, commit_transaction, BJ_Forget);
763 : : /*
764 : : * Wake up any transactions which were waiting for this
765 : : * IO to complete. The barrier must be here so that changes
766 : : * by journal_file_buffer() take effect before wake_up_bit()
767 : : * does the waitqueue check.
768 : : */
769 : 0 : smp_mb();
770 : 0 : wake_up_bit(&bh->b_state, BH_Unshadow);
771 : : JBUFFER_TRACE(jh, "brelse shadowed buffer");
772 : 0 : __brelse(bh);
773 : : }
774 : :
775 [ # # ]: 0 : J_ASSERT (commit_transaction->t_shadow_list == NULL);
776 : :
777 : : jbd_debug(3, "JBD: commit phase 5\n");
778 : :
779 : : /* Here we wait for the revoke record and descriptor record buffers */
780 : : wait_for_ctlbuf:
781 [ # # ]: 0 : while (commit_transaction->t_log_list != NULL) {
782 : : struct buffer_head *bh;
783 : :
784 : 0 : jh = commit_transaction->t_log_list->b_tprev;
785 : : bh = jh2bh(jh);
786 [ # # ]: 0 : if (buffer_locked(bh)) {
787 : : wait_on_buffer(bh);
788 : : goto wait_for_ctlbuf;
789 : : }
790 [ # # ]: 0 : if (cond_resched())
791 : : goto wait_for_ctlbuf;
792 : :
793 [ # # ]: 0 : if (unlikely(!buffer_uptodate(bh)))
794 : : err = -EIO;
795 : :
796 : : BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
797 : : clear_buffer_jwrite(bh);
798 : 0 : journal_unfile_buffer(journal, jh);
799 : 0 : journal_put_journal_head(jh);
800 : 0 : __brelse(bh); /* One for getblk */
801 : : /* AKPM: bforget here */
802 : : }
803 : :
804 [ # # ]: 0 : if (err)
805 : 0 : journal_abort(journal, err);
806 : :
807 : : jbd_debug(3, "JBD: commit phase 6\n");
808 : :
809 : : /* All metadata is written, now write commit record and do cleanup */
810 : : spin_lock(&journal->j_state_lock);
811 [ # # ]: 0 : J_ASSERT(commit_transaction->t_state == T_COMMIT);
812 : 0 : commit_transaction->t_state = T_COMMIT_RECORD;
813 : : spin_unlock(&journal->j_state_lock);
814 : :
815 [ # # ]: 0 : if (journal_write_commit_record(journal, commit_transaction))
816 : : err = -EIO;
817 : :
818 [ # # ]: 0 : if (err)
819 : 0 : journal_abort(journal, err);
820 : :
821 : : /* End of a transaction! Finally, we can do checkpoint
822 : : processing: any buffers committed as a result of this
823 : : transaction can be removed from any checkpoint list it was on
824 : : before. */
825 : :
826 : : jbd_debug(3, "JBD: commit phase 7\n");
827 : :
828 [ # # ]: 0 : J_ASSERT(commit_transaction->t_sync_datalist == NULL);
829 [ # # ]: 0 : J_ASSERT(commit_transaction->t_buffers == NULL);
830 [ # # ]: 0 : J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
831 [ # # ]: 0 : J_ASSERT(commit_transaction->t_iobuf_list == NULL);
832 [ # # ]: 0 : J_ASSERT(commit_transaction->t_shadow_list == NULL);
833 [ # # ]: 0 : J_ASSERT(commit_transaction->t_log_list == NULL);
834 : :
835 : : restart_loop:
836 : : /*
837 : : * As there are other places (journal_unmap_buffer()) adding buffers
838 : : * to this list we have to be careful and hold the j_list_lock.
839 : : */
840 : : spin_lock(&journal->j_list_lock);
841 [ # # ]: 0 : while (commit_transaction->t_forget) {
842 : : transaction_t *cp_transaction;
843 : : struct buffer_head *bh;
844 : : int try_to_free = 0;
845 : :
846 : : jh = commit_transaction->t_forget;
847 : : spin_unlock(&journal->j_list_lock);
848 : : bh = jh2bh(jh);
849 : : /*
850 : : * Get a reference so that bh cannot be freed before we are
851 : : * done with it.
852 : : */
853 : : get_bh(bh);
854 : : jbd_lock_bh_state(bh);
855 [ # # ][ # # ]: 0 : J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
856 : : jh->b_transaction == journal->j_running_transaction);
857 : :
858 : : /*
859 : : * If there is undo-protected committed data against
860 : : * this buffer, then we can remove it now. If it is a
861 : : * buffer needing such protection, the old frozen_data
862 : : * field now points to a committed version of the
863 : : * buffer, so rotate that field to the new committed
864 : : * data.
865 : : *
866 : : * Otherwise, we can just throw away the frozen data now.
867 : : */
868 [ # # ]: 0 : if (jh->b_committed_data) {
869 : 0 : jbd_free(jh->b_committed_data, bh->b_size);
870 : 0 : jh->b_committed_data = NULL;
871 [ # # ]: 0 : if (jh->b_frozen_data) {
872 : 0 : jh->b_committed_data = jh->b_frozen_data;
873 : 0 : jh->b_frozen_data = NULL;
874 : : }
875 [ # # ]: 0 : } else if (jh->b_frozen_data) {
876 : 0 : jbd_free(jh->b_frozen_data, bh->b_size);
877 : 0 : jh->b_frozen_data = NULL;
878 : : }
879 : :
880 : : spin_lock(&journal->j_list_lock);
881 : 0 : cp_transaction = jh->b_cp_transaction;
882 [ # # ]: 0 : if (cp_transaction) {
883 : : JBUFFER_TRACE(jh, "remove from old cp transaction");
884 : 0 : __journal_remove_checkpoint(jh);
885 : : }
886 : :
887 : : /* Only re-checkpoint the buffer_head if it is marked
888 : : * dirty. If the buffer was added to the BJ_Forget list
889 : : * by journal_forget, it may no longer be dirty and
890 : : * there's no point in keeping a checkpoint record for
891 : : * it. */
892 : :
893 : : /*
894 : : * A buffer which has been freed while still being journaled by
895 : : * a previous transaction.
896 : : */
897 [ # # ]: 0 : if (buffer_freed(bh)) {
898 : : /*
899 : : * If the running transaction is the one containing
900 : : * "add to orphan" operation (b_next_transaction !=
901 : : * NULL), we have to wait for that transaction to
902 : : * commit before we can really get rid of the buffer.
903 : : * So just clear b_modified to not confuse transaction
904 : : * credit accounting and refile the buffer to
905 : : * BJ_Forget of the running transaction. If the just
906 : : * committed transaction contains "add to orphan"
907 : : * operation, we can completely invalidate the buffer
908 : : * now. We are rather throughout in that since the
909 : : * buffer may be still accessible when blocksize <
910 : : * pagesize and it is attached to the last partial
911 : : * page.
912 : : */
913 : 0 : jh->b_modified = 0;
914 [ # # ]: 0 : if (!jh->b_next_transaction) {
915 : : clear_buffer_freed(bh);
916 : : clear_buffer_jbddirty(bh);
917 : : clear_buffer_mapped(bh);
918 : : clear_buffer_new(bh);
919 : : clear_buffer_req(bh);
920 : 0 : bh->b_bdev = NULL;
921 : : }
922 : : }
923 : :
924 [ # # ]: 0 : if (buffer_jbddirty(bh)) {
925 : : JBUFFER_TRACE(jh, "add to new checkpointing trans");
926 : 0 : __journal_insert_checkpoint(jh, commit_transaction);
927 [ # # ]: 0 : if (is_journal_aborted(journal))
928 : : clear_buffer_jbddirty(bh);
929 : : } else {
930 [ # # ]: 0 : J_ASSERT_BH(bh, !buffer_dirty(bh));
931 : : /*
932 : : * The buffer on BJ_Forget list and not jbddirty means
933 : : * it has been freed by this transaction and hence it
934 : : * could not have been reallocated until this
935 : : * transaction has committed. *BUT* it could be
936 : : * reallocated once we have written all the data to
937 : : * disk and before we process the buffer on BJ_Forget
938 : : * list.
939 : : */
940 [ # # ]: 0 : if (!jh->b_next_transaction)
941 : : try_to_free = 1;
942 : : }
943 : : JBUFFER_TRACE(jh, "refile or unfile freed buffer");
944 : 0 : __journal_refile_buffer(jh);
945 : : jbd_unlock_bh_state(bh);
946 [ # # ]: 0 : if (try_to_free)
947 : 0 : release_buffer_page(bh);
948 : : else
949 : 0 : __brelse(bh);
950 : 0 : cond_resched_lock(&journal->j_list_lock);
951 : : }
952 : : spin_unlock(&journal->j_list_lock);
953 : : /*
954 : : * This is a bit sleazy. We use j_list_lock to protect transition
955 : : * of a transaction into T_FINISHED state and calling
956 : : * __journal_drop_transaction(). Otherwise we could race with
957 : : * other checkpointing code processing the transaction...
958 : : */
959 : : spin_lock(&journal->j_state_lock);
960 : : spin_lock(&journal->j_list_lock);
961 : : /*
962 : : * Now recheck if some buffers did not get attached to the transaction
963 : : * while the lock was dropped...
964 : : */
965 [ # # ]: 0 : if (commit_transaction->t_forget) {
966 : : spin_unlock(&journal->j_list_lock);
967 : : spin_unlock(&journal->j_state_lock);
968 : : goto restart_loop;
969 : : }
970 : :
971 : : /* Done with this transaction! */
972 : :
973 : : jbd_debug(3, "JBD: commit phase 8\n");
974 : :
975 [ # # ]: 0 : J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD);
976 : :
977 : 0 : commit_transaction->t_state = T_FINISHED;
978 [ # # ]: 0 : J_ASSERT(commit_transaction == journal->j_committing_transaction);
979 : 0 : journal->j_commit_sequence = commit_transaction->t_tid;
980 : 0 : journal->j_committing_transaction = NULL;
981 : 0 : commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
982 : :
983 : : /*
984 : : * weight the commit time higher than the average time so we don't
985 : : * react too strongly to vast changes in commit time
986 : : */
987 [ # # ]: 0 : if (likely(journal->j_average_commit_time))
988 : 0 : journal->j_average_commit_time = (commit_time*3 +
989 : 0 : journal->j_average_commit_time) / 4;
990 : : else
991 : 0 : journal->j_average_commit_time = commit_time;
992 : :
993 : : spin_unlock(&journal->j_state_lock);
994 : :
995 [ # # ][ # # ]: 0 : if (commit_transaction->t_checkpoint_list == NULL &&
996 : 0 : commit_transaction->t_checkpoint_io_list == NULL) {
997 : 0 : __journal_drop_transaction(journal, commit_transaction);
998 : : } else {
999 [ # # ]: 0 : if (journal->j_checkpoint_transactions == NULL) {
1000 : 0 : journal->j_checkpoint_transactions = commit_transaction;
1001 : 0 : commit_transaction->t_cpnext = commit_transaction;
1002 : 0 : commit_transaction->t_cpprev = commit_transaction;
1003 : : } else {
1004 : 0 : commit_transaction->t_cpnext =
1005 : : journal->j_checkpoint_transactions;
1006 : 0 : commit_transaction->t_cpprev =
1007 : 0 : commit_transaction->t_cpnext->t_cpprev;
1008 : 0 : commit_transaction->t_cpnext->t_cpprev =
1009 : : commit_transaction;
1010 : 0 : commit_transaction->t_cpprev->t_cpnext =
1011 : : commit_transaction;
1012 : : }
1013 : : }
1014 : : spin_unlock(&journal->j_list_lock);
1015 : :
1016 : : trace_jbd_end_commit(journal, commit_transaction);
1017 : : jbd_debug(1, "JBD: commit %d complete, head %d\n",
1018 : : journal->j_commit_sequence, journal->j_tail_sequence);
1019 : :
1020 : 0 : wake_up(&journal->j_wait_done_commit);
1021 : 0 : }
|