Branch data Line data Source code
1 : : /*
2 : : * linux/fs/ext4/inode.c
3 : : *
4 : : * Copyright (C) 1992, 1993, 1994, 1995
5 : : * Remy Card (card@masi.ibp.fr)
6 : : * Laboratoire MASI - Institut Blaise Pascal
7 : : * Universite Pierre et Marie Curie (Paris VI)
8 : : *
9 : : * from
10 : : *
11 : : * linux/fs/minix/inode.c
12 : : *
13 : : * Copyright (C) 1991, 1992 Linus Torvalds
14 : : *
15 : : * 64-bit file support on 64-bit platforms by Jakub Jelinek
16 : : * (jj@sunsite.ms.mff.cuni.cz)
17 : : *
18 : : * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000
19 : : */
20 : :
21 : : #include <linux/fs.h>
22 : : #include <linux/time.h>
23 : : #include <linux/jbd2.h>
24 : : #include <linux/highuid.h>
25 : : #include <linux/pagemap.h>
26 : : #include <linux/quotaops.h>
27 : : #include <linux/string.h>
28 : : #include <linux/buffer_head.h>
29 : : #include <linux/writeback.h>
30 : : #include <linux/pagevec.h>
31 : : #include <linux/mpage.h>
32 : : #include <linux/namei.h>
33 : : #include <linux/uio.h>
34 : : #include <linux/bio.h>
35 : : #include <linux/workqueue.h>
36 : : #include <linux/kernel.h>
37 : : #include <linux/printk.h>
38 : : #include <linux/slab.h>
39 : : #include <linux/ratelimit.h>
40 : : #include <linux/aio.h>
41 : :
42 : : #include "ext4_jbd2.h"
43 : : #include "xattr.h"
44 : : #include "acl.h"
45 : : #include "truncate.h"
46 : :
47 : : #include <trace/events/ext4.h>
48 : :
49 : : #define MPAGE_DA_EXTENT_TAIL 0x01
50 : :
51 : 0 : static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
52 : : struct ext4_inode_info *ei)
53 : : {
54 : 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
55 : : __u16 csum_lo;
56 : : __u16 csum_hi = 0;
57 : : __u32 csum;
58 : :
59 : 0 : csum_lo = le16_to_cpu(raw->i_checksum_lo);
60 : 0 : raw->i_checksum_lo = 0;
61 [ # # ][ # # ]: 0 : if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
62 : 0 : EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
63 : 0 : csum_hi = le16_to_cpu(raw->i_checksum_hi);
64 : 0 : raw->i_checksum_hi = 0;
65 : : }
66 : :
67 : 0 : csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
68 : 0 : EXT4_INODE_SIZE(inode->i_sb));
69 : :
70 : 0 : raw->i_checksum_lo = cpu_to_le16(csum_lo);
71 [ # # ][ # # ]: 0 : if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
72 : 0 : EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
73 : 0 : raw->i_checksum_hi = cpu_to_le16(csum_hi);
74 : :
75 : 0 : return csum;
76 : : }
77 : :
78 : 0 : static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
79 : : struct ext4_inode_info *ei)
80 : : {
81 : : __u32 provided, calculated;
82 : :
83 [ + - ]: 5535 : if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
84 [ - + ]: 5535 : cpu_to_le32(EXT4_OS_LINUX) ||
85 : 5535 : !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
86 : : EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
87 : : return 1;
88 : :
89 : 0 : provided = le16_to_cpu(raw->i_checksum_lo);
90 : 0 : calculated = ext4_inode_csum(inode, raw, ei);
91 [ # # ][ # # ]: 5535 : if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
92 : 0 : EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
93 : 0 : provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16;
94 : : else
95 : 0 : calculated &= 0xFFFF;
96 : :
97 : 0 : return provided == calculated;
98 : : }
99 : :
100 : 0 : static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
101 : : struct ext4_inode_info *ei)
102 : : {
103 : : __u32 csum;
104 : :
105 [ + + ]: 10850591 : if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
106 [ - + ]: 10850488 : cpu_to_le32(EXT4_OS_LINUX) ||
107 : 10850488 : !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
108 : : EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
109 : 0 : return;
110 : :
111 : 0 : csum = ext4_inode_csum(inode, raw, ei);
112 : 0 : raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF);
113 [ # # ][ # # ]: 0 : if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
114 : 0 : EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
115 : 0 : raw->i_checksum_hi = cpu_to_le16(csum >> 16);
116 : : }
117 : :
118 : : static inline int ext4_begin_ordered_truncate(struct inode *inode,
119 : : loff_t new_size)
120 : : {
121 : : trace_ext4_begin_ordered_truncate(inode, new_size);
122 : : /*
123 : : * If jinode is zero, then we never opened the file for
124 : : * writing, so there's no need to call
125 : : * jbd2_journal_begin_ordered_truncate() since there's no
126 : : * outstanding writes we need to flush.
127 : : */
128 [ + + ][ + + ]: 354817 : if (!EXT4_I(inode)->jinode)
129 : : return 0;
130 : 354707 : return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
131 : : EXT4_I(inode)->jinode,
132 : : new_size);
133 : : }
134 : :
135 : : static void ext4_invalidatepage(struct page *page, unsigned int offset,
136 : : unsigned int length);
137 : : static int __ext4_journalled_writepage(struct page *page, unsigned int len);
138 : : static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
139 : : static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
140 : : int pextents);
141 : :
142 : : /*
143 : : * Test whether an inode is a fast symlink.
144 : : */
145 : : static int ext4_inode_is_fast_symlink(struct inode *inode)
146 : : {
147 [ # # ][ - + ]: 105198 : int ea_blocks = EXT4_I(inode)->i_file_acl ?
[ - + ][ - + ]
148 : 0 : (inode->i_sb->s_blocksize >> 9) : 0;
149 : :
150 [ # # ][ # # ]: 7483 : return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
[ + - ][ - + ]
[ + - ][ - + ]
[ + - ][ + - ]
151 : : }
152 : :
153 : : /*
154 : : * Restart the transaction associated with *handle. This does a commit,
155 : : * so before we call here everything must be consistently dirtied against
156 : : * this transaction.
157 : : */
158 : 0 : int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
159 : : int nblocks)
160 : : {
161 : : int ret;
162 : :
163 : : /*
164 : : * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
165 : : * moment, get_block can be called only for blocks inside i_size since
166 : : * page cache has been already dropped and writes are blocked by
167 : : * i_mutex. So we can safely drop the i_data_sem here.
168 : : */
169 [ - + ]: 5 : BUG_ON(EXT4_JOURNAL(inode) == NULL);
170 : : jbd_debug(2, "restarting handle %p\n", handle);
171 : 5 : up_write(&EXT4_I(inode)->i_data_sem);
172 : : ret = ext4_journal_restart(handle, nblocks);
173 : 5 : down_write(&EXT4_I(inode)->i_data_sem);
174 : 5 : ext4_discard_preallocations(inode);
175 : :
176 : 5 : return ret;
177 : : }
178 : :
179 : : /*
180 : : * Called at the last iput() if i_nlink is zero.
181 : : */
182 : 0 : void ext4_evict_inode(struct inode *inode)
183 : : {
184 : : handle_t *handle;
185 : : int err;
186 : :
187 : : trace_ext4_evict_inode(inode);
188 : :
189 [ + + ]: 476193 : if (inode->i_nlink) {
190 : : /*
191 : : * When journalling data dirty buffers are tracked only in the
192 : : * journal. So although mm thinks everything is clean and
193 : : * ready for reaping the inode might still have some pages to
194 : : * write in the running transaction or waiting to be
195 : : * checkpointed. Thus calling jbd2_journal_invalidatepage()
196 : : * (via truncate_inode_pages()) to discard these buffers can
197 : : * cause data loss. Also even if we did not discard these
198 : : * buffers, we would have no way to find them after the inode
199 : : * is reaped and thus user could see stale data if he tries to
200 : : * read them before the transaction is checkpointed. So be
201 : : * careful and force everything to disk here... We use
202 : : * ei->i_datasync_tid to store the newest transaction
203 : : * containing inode's data.
204 : : *
205 : : * Note that directories do not have this problem because they
206 : : * don't use page cache.
207 : : */
208 [ + + ][ + + ]: 18146 : if (ext4_should_journal_data(inode) &&
209 [ + - ]: 341 : (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
210 : 341 : inode->i_ino != EXT4_JOURNAL_INO) {
211 : : journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
212 : 341 : tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
213 : :
214 : 341 : jbd2_complete_transaction(journal, commit_tid);
215 : 341 : filemap_write_and_wait(&inode->i_data);
216 : : }
217 : 18146 : truncate_inode_pages(&inode->i_data, 0);
218 : :
219 [ - + ]: 18146 : WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
220 : : goto no_delete;
221 : : }
222 : :
223 [ + - ]: 458047 : if (!is_bad_inode(inode))
224 : 458047 : dquot_initialize(inode);
225 : :
226 [ + + ]: 458047 : if (ext4_should_order_data(inode))
227 : : ext4_begin_ordered_truncate(inode, 0);
228 : 458047 : truncate_inode_pages(&inode->i_data, 0);
229 : :
230 [ - + ]: 458046 : WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
231 [ + - ]: 458046 : if (is_bad_inode(inode))
232 : : goto no_delete;
233 : :
234 : : /*
235 : : * Protect us against freezing - iput() caller didn't have to have any
236 : : * protection against it
237 : : */
238 : 458047 : sb_start_intwrite(inode->i_sb);
239 : 0 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
240 : : ext4_blocks_for_truncate(inode)+3);
241 [ - + ]: 458047 : if (IS_ERR(handle)) {
242 [ # # ]: 0 : ext4_std_error(inode->i_sb, PTR_ERR(handle));
243 : : /*
244 : : * If we're going to skip the normal cleanup, we still need to
245 : : * make sure that the in-core orphan linked list is properly
246 : : * cleaned up.
247 : : */
248 : 0 : ext4_orphan_del(NULL, inode);
249 : 0 : sb_end_intwrite(inode->i_sb);
250 : : goto no_delete;
251 : : }
252 : :
253 [ + - ][ - + ]: 458047 : if (IS_SYNC(inode))
254 : : ext4_handle_sync(handle);
255 : 458047 : inode->i_size = 0;
256 : 458047 : err = ext4_mark_inode_dirty(handle, inode);
257 [ - + ]: 458047 : if (err) {
258 : 0 : ext4_warning(inode->i_sb,
259 : : "couldn't mark inode dirty (err %d)", err);
260 : 0 : goto stop_handle;
261 : : }
262 [ + + ]: 458047 : if (inode->i_blocks)
263 : 58288 : ext4_truncate(inode);
264 : :
265 : : /*
266 : : * ext4_ext_truncate() doesn't reserve any slop when it
267 : : * restarts journal transactions; therefore there may not be
268 : : * enough credits left in the handle to remove the inode from
269 : : * the orphan list and set the dtime field.
270 : : */
271 [ - + ]: 458047 : if (!ext4_handle_has_enough_credits(handle, 3)) {
272 : : err = ext4_journal_extend(handle, 3);
273 [ # # ]: 0 : if (err > 0)
274 : : err = ext4_journal_restart(handle, 3);
275 [ # # ]: 0 : if (err != 0) {
276 : 0 : ext4_warning(inode->i_sb,
277 : : "couldn't extend journal (err %d)", err);
278 : : stop_handle:
279 : 0 : ext4_journal_stop(handle);
280 : 0 : ext4_orphan_del(NULL, inode);
281 : 0 : sb_end_intwrite(inode->i_sb);
282 : : goto no_delete;
283 : : }
284 : : }
285 : :
286 : : /*
287 : : * Kill off the orphan record which ext4_truncate created.
288 : : * AKPM: I think this can be inside the above `if'.
289 : : * Note that ext4_orphan_del() has to be able to cope with the
290 : : * deletion of a non-existent orphan - this is because we don't
291 : : * know if ext4_truncate() actually created an orphan record.
292 : : * (Well, we could do this if we need to, but heck - it works)
293 : : */
294 : 458047 : ext4_orphan_del(handle, inode);
295 : 458047 : EXT4_I(inode)->i_dtime = get_seconds();
296 : :
297 : : /*
298 : : * One subtle ordering requirement: if anything has gone wrong
299 : : * (transaction abort, IO errors, whatever), then we can still
300 : : * do these next steps (the fs will already have been marked as
301 : : * having errors), but we can't free the inode if the mark_dirty
302 : : * fails.
303 : : */
304 [ - + ]: 458047 : if (ext4_mark_inode_dirty(handle, inode))
305 : : /* If that failed, just do the required in-core inode clear. */
306 : 0 : ext4_clear_inode(inode);
307 : : else
308 : 458045 : ext4_free_inode(handle, inode);
309 : 458047 : ext4_journal_stop(handle);
310 : 458047 : sb_end_intwrite(inode->i_sb);
311 : 476192 : return;
312 : : no_delete:
313 : 18146 : ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
314 : : }
315 : :
316 : : #ifdef CONFIG_QUOTA
317 : 0 : qsize_t *ext4_get_reserved_space(struct inode *inode)
318 : : {
319 : 3104791 : return &EXT4_I(inode)->i_reserved_quota;
320 : : }
321 : : #endif
322 : :
323 : : /*
324 : : * Calculate the number of metadata blocks need to reserve
325 : : * to allocate a block located at @lblock
326 : : */
327 : 0 : static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
328 : : {
329 [ + - ]: 1828675 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
330 : 1828675 : return ext4_ext_calc_metadata_amount(inode, lblock);
331 : :
332 : 0 : return ext4_ind_calc_metadata_amount(inode, lblock);
333 : : }
334 : :
335 : : /*
336 : : * Called with i_data_sem down, which is important since we can call
337 : : * ext4_discard_preallocations() from here.
338 : : */
339 : 0 : void ext4_da_update_reserve_space(struct inode *inode,
340 : : int used, int quota_claim)
341 : : {
342 : 97697 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
343 : : struct ext4_inode_info *ei = EXT4_I(inode);
344 : :
345 : : spin_lock(&ei->i_block_reservation_lock);
346 : : trace_ext4_da_update_reserve_space(inode, used, quota_claim);
347 [ - + ]: 97720 : if (unlikely(used > ei->i_reserved_data_blocks)) {
348 : 0 : ext4_warning(inode->i_sb, "%s: ino %lu, used %d "
349 : : "with only %d reserved data blocks",
350 : : __func__, inode->i_ino, used,
351 : : ei->i_reserved_data_blocks);
352 : 0 : WARN_ON(1);
353 : 0 : used = ei->i_reserved_data_blocks;
354 : : }
355 : :
356 [ - + ]: 97720 : if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
357 : 0 : ext4_warning(inode->i_sb, "ino %lu, allocated %d "
358 : : "with only %d reserved metadata blocks "
359 : : "(releasing %d blocks with reserved %d data blocks)",
360 : : inode->i_ino, ei->i_allocated_meta_blocks,
361 : : ei->i_reserved_meta_blocks, used,
362 : : ei->i_reserved_data_blocks);
363 : 0 : WARN_ON(1);
364 : 0 : ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
365 : : }
366 : :
367 : : /* Update per-inode reservations */
368 : 97720 : ei->i_reserved_data_blocks -= used;
369 : 97720 : ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
370 : 195440 : percpu_counter_sub(&sbi->s_dirtyclusters_counter,
371 : 97720 : used + ei->i_allocated_meta_blocks);
372 : 97727 : ei->i_allocated_meta_blocks = 0;
373 : :
374 [ + + ]: 97727 : if (ei->i_reserved_data_blocks == 0) {
375 : : /*
376 : : * We can release all of the reserved metadata blocks
377 : : * only when we have written all of the delayed
378 : : * allocation blocks.
379 : : */
380 : 25118 : percpu_counter_sub(&sbi->s_dirtyclusters_counter,
381 : 25118 : ei->i_reserved_meta_blocks);
382 : 25119 : ei->i_reserved_meta_blocks = 0;
383 : 25119 : ei->i_da_metadata_calc_len = 0;
384 : : }
385 : : spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
386 : :
387 : : /* Update quota subsystem for data blocks */
388 [ + - ]: 97723 : if (quota_claim)
389 : 97723 : dquot_claim_block(inode, EXT4_C2B(sbi, used));
390 : : else {
391 : : /*
392 : : * We did fallocate with an offset that is already delayed
393 : : * allocated. So on delayed allocated writeback we should
394 : : * not re-claim the quota for fallocated blocks.
395 : : */
396 : 0 : dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
397 : : }
398 : :
399 : : /*
400 : : * If we have done all the pending block allocations and if
401 : : * there aren't any writers on the inode, we can discard the
402 : : * inode's preallocations.
403 : : */
404 [ + + ][ + + ]: 97726 : if ((ei->i_reserved_data_blocks == 0) &&
405 : 25119 : (atomic_read(&inode->i_writecount) == 0))
406 : 13540 : ext4_discard_preallocations(inode);
407 : 97726 : }
408 : :
409 : 0 : static int __check_block_validity(struct inode *inode, const char *func,
410 : : unsigned int line,
411 : : struct ext4_map_blocks *map)
412 : : {
413 [ - + ]: 2349540 : if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
414 : : map->m_len)) {
415 : 0 : ext4_error_inode(inode, func, line, map->m_pblk,
416 : : "lblock %lu mapped to illegal pblock "
417 : : "(length %d)", (unsigned long) map->m_lblk,
418 : : map->m_len);
419 : 0 : return -EIO;
420 : : }
421 : : return 0;
422 : : }
423 : :
424 : : #define check_block_validity(inode, map) \
425 : : __check_block_validity((inode), __func__, __LINE__, (map))
426 : :
427 : : #ifdef ES_AGGRESSIVE_TEST
428 : : static void ext4_map_blocks_es_recheck(handle_t *handle,
429 : : struct inode *inode,
430 : : struct ext4_map_blocks *es_map,
431 : : struct ext4_map_blocks *map,
432 : : int flags)
433 : : {
434 : : int retval;
435 : :
436 : : map->m_flags = 0;
437 : : /*
438 : : * There is a race window that the result is not the same.
439 : : * e.g. xfstests #223 when dioread_nolock enables. The reason
440 : : * is that we lookup a block mapping in extent status tree with
441 : : * out taking i_data_sem. So at the time the unwritten extent
442 : : * could be converted.
443 : : */
444 : : if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
445 : : down_read((&EXT4_I(inode)->i_data_sem));
446 : : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
447 : : retval = ext4_ext_map_blocks(handle, inode, map, flags &
448 : : EXT4_GET_BLOCKS_KEEP_SIZE);
449 : : } else {
450 : : retval = ext4_ind_map_blocks(handle, inode, map, flags &
451 : : EXT4_GET_BLOCKS_KEEP_SIZE);
452 : : }
453 : : if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
454 : : up_read((&EXT4_I(inode)->i_data_sem));
455 : : /*
456 : : * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
457 : : * because it shouldn't be marked in es_map->m_flags.
458 : : */
459 : : map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
460 : :
461 : : /*
462 : : * We don't check m_len because extent will be collpased in status
463 : : * tree. So the m_len might not equal.
464 : : */
465 : : if (es_map->m_lblk != map->m_lblk ||
466 : : es_map->m_flags != map->m_flags ||
467 : : es_map->m_pblk != map->m_pblk) {
468 : : printk("ES cache assertion failed for inode: %lu "
469 : : "es_cached ex [%d/%d/%llu/%x] != "
470 : : "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
471 : : inode->i_ino, es_map->m_lblk, es_map->m_len,
472 : : es_map->m_pblk, es_map->m_flags, map->m_lblk,
473 : : map->m_len, map->m_pblk, map->m_flags,
474 : : retval, flags);
475 : : }
476 : : }
477 : : #endif /* ES_AGGRESSIVE_TEST */
478 : :
479 : : /*
480 : : * The ext4_map_blocks() function tries to look up the requested blocks,
481 : : * and returns if the blocks are already mapped.
482 : : *
483 : : * Otherwise it takes the write lock of the i_data_sem and allocate blocks
484 : : * and store the allocated blocks in the result buffer head and mark it
485 : : * mapped.
486 : : *
487 : : * If file type is extents based, it will call ext4_ext_map_blocks(),
488 : : * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
489 : : * based files
490 : : *
491 : : * On success, it returns the number of blocks being mapped or allocate.
492 : : * if create==0 and the blocks are pre-allocated and uninitialized block,
493 : : * the result buffer head is unmapped. If the create ==1, it will make sure
494 : : * the buffer head is mapped.
495 : : *
496 : : * It returns 0 if plain look up failed (blocks have not been allocated), in
497 : : * that case, buffer head is unmapped
498 : : *
499 : : * It returns the error in case of allocation failure.
500 : : */
501 : 0 : int ext4_map_blocks(handle_t *handle, struct inode *inode,
502 : : struct ext4_map_blocks *map, int flags)
503 : : {
504 : : struct extent_status es;
505 : : int retval;
506 : : #ifdef ES_AGGRESSIVE_TEST
507 : : struct ext4_map_blocks orig_map;
508 : :
509 : : memcpy(&orig_map, map, sizeof(*map));
510 : : #endif
511 : :
512 : 2546614 : map->m_flags = 0;
513 : : ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
514 : : "logical block %lu\n", inode->i_ino, flags, map->m_len,
515 : : (unsigned long) map->m_lblk);
516 : :
517 : : /* Lookup extent status tree firstly */
518 [ + + ]: 2546614 : if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
519 : 2323816 : ext4_es_lru_add(inode);
520 [ + + ][ + + ]: 2323706 : if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
521 : 4162390 : map->m_pblk = ext4_es_pblock(&es) +
522 : 4162390 : map->m_lblk - es.es_lblk;
523 [ + + ]: 2081195 : map->m_flags |= ext4_es_is_written(&es) ?
524 : : EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
525 : 2081195 : retval = es.es_len - (map->m_lblk - es.es_lblk);
526 [ + + ]: 2081195 : if (retval > map->m_len)
527 : 372337 : retval = map->m_len;
528 : 2081195 : map->m_len = retval;
529 [ + + ][ - + ]: 242511 : } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
530 : : retval = 0;
531 : : } else {
532 : 0 : BUG_ON(1);
533 : : }
534 : : #ifdef ES_AGGRESSIVE_TEST
535 : : ext4_map_blocks_es_recheck(handle, inode, map,
536 : : &orig_map, flags);
537 : : #endif
538 : : goto found;
539 : : }
540 : :
541 : : /*
542 : : * Try to see if we can get the block without requesting a new
543 : : * file system block.
544 : : */
545 [ + + ]: 222954 : if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
546 : 222944 : down_read((&EXT4_I(inode)->i_data_sem));
547 [ + - ]: 222927 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
548 : 222927 : retval = ext4_ext_map_blocks(handle, inode, map, flags &
549 : : EXT4_GET_BLOCKS_KEEP_SIZE);
550 : : } else {
551 : 0 : retval = ext4_ind_map_blocks(handle, inode, map, flags &
552 : : EXT4_GET_BLOCKS_KEEP_SIZE);
553 : : }
554 [ + + ]: 2769663 : if (retval > 0) {
555 : : int ret;
556 : : unsigned int status;
557 : :
558 [ - + ]: 51264 : if (unlikely(retval != map->m_len)) {
559 : 0 : ext4_warning(inode->i_sb,
560 : : "ES len assertion failed for inode "
561 : : "%lu: retval %d != map->m_len %d",
562 : : inode->i_ino, retval, map->m_len);
563 : 0 : WARN_ON(1);
564 : : }
565 : :
566 [ + - ]: 51264 : status = map->m_flags & EXT4_MAP_UNWRITTEN ?
567 : : EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
568 [ + - - + ]: 102528 : if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
569 : 51264 : ext4_find_delalloc_range(inode, map->m_lblk,
570 : 51264 : map->m_lblk + map->m_len - 1))
571 : 0 : status |= EXTENT_STATUS_DELAYED;
572 : 51264 : ret = ext4_es_insert_extent(inode, map->m_lblk,
573 : : map->m_len, map->m_pblk, status);
574 [ - + ]: 51264 : if (ret < 0)
575 : : retval = ret;
576 : : }
577 [ + + ]: 223049 : if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
578 : 222949 : up_read((&EXT4_I(inode)->i_data_sem));
579 : :
580 : : found:
581 [ + + ][ + + ]: 2546765 : if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
582 : 2132450 : int ret = check_block_validity(inode, map);
583 [ + - ]: 2132448 : if (ret != 0)
584 : : return ret;
585 : : }
586 : :
587 : : /* If it is only a block(s) look up */
588 [ + + ]: 2546763 : if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
589 : : return retval;
590 : :
591 : : /*
592 : : * Returns if the blocks have already allocated
593 : : *
594 : : * Note that if blocks have been preallocated
595 : : * ext4_ext_get_block() returns the create = 0
596 : : * with buffer head unmapped.
597 : : */
598 [ + + ][ + + ]: 361866 : if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
599 : : return retval;
600 : :
601 : : /*
602 : : * Here we clear m_flags because after allocating an new extent,
603 : : * it will be set again.
604 : : */
605 : 217094 : map->m_flags &= ~EXT4_MAP_FLAGS;
606 : :
607 : : /*
608 : : * New blocks allocate and/or writing to uninitialized extent
609 : : * will possibly result in updating i_data, so we take
610 : : * the write lock of i_data_sem, and call get_blocks()
611 : : * with create == 1 flag.
612 : : */
613 : 217094 : down_write((&EXT4_I(inode)->i_data_sem));
614 : :
615 : : /*
616 : : * if the caller is from delayed allocation writeout path
617 : : * we have already reserved fs blocks for allocation
618 : : * let the underlying get_block() function know to
619 : : * avoid double accounting
620 : : */
621 [ + + ]: 217099 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
622 : : ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
623 : : /*
624 : : * We need to check for EXT4 here because migrate
625 : : * could have changed the inode type in between
626 : : */
627 [ + - ]: 217078 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
628 : 217078 : retval = ext4_ext_map_blocks(handle, inode, map, flags);
629 : : } else {
630 : 0 : retval = ext4_ind_map_blocks(handle, inode, map, flags);
631 : :
632 [ # # ][ # # ]: 0 : if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
633 : : /*
634 : : * We allocated new blocks which will result in
635 : : * i_data's format changing. Force the migrate
636 : : * to fail by clearing migrate flags
637 : : */
638 : : ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
639 : : }
640 : :
641 : : /*
642 : : * Update reserved blocks/metadata blocks after successful
643 : : * block allocation which had been deferred till now. We don't
644 : : * support fallocate for non extent files. So we can update
645 : : * reserve space here.
646 : : */
647 [ # # ][ # # ]: 0 : if ((retval > 0) &&
648 : : (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
649 : 0 : ext4_da_update_reserve_space(inode, retval, 1);
650 : : }
651 [ + + ]: 217103 : if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
652 : : ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
653 : :
654 [ + - ]: 217097 : if (retval > 0) {
655 : : int ret;
656 : : unsigned int status;
657 : :
658 [ - + ]: 217097 : if (unlikely(retval != map->m_len)) {
659 : 0 : ext4_warning(inode->i_sb,
660 : : "ES len assertion failed for inode "
661 : : "%lu: retval %d != map->m_len %d",
662 : : inode->i_ino, retval, map->m_len);
663 : 0 : WARN_ON(1);
664 : : }
665 : :
666 : : /*
667 : : * If the extent has been zeroed out, we don't need to update
668 : : * extent status tree.
669 : : */
670 [ - + # # ]: 217074 : if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
671 : 0 : ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
672 [ # # ]: 0 : if (ext4_es_is_written(&es))
673 : : goto has_zeroout;
674 : : }
675 [ + ]: 217074 : status = map->m_flags & EXT4_MAP_UNWRITTEN ?
676 : : EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
677 [ + + + + ]: 336447 : if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
678 : 119373 : ext4_find_delalloc_range(inode, map->m_lblk,
679 : 119373 : map->m_lblk + map->m_len - 1))
680 : 5 : status |= EXTENT_STATUS_DELAYED;
681 : 217074 : ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
682 : : map->m_pblk, status);
683 [ - + ]: 217100 : if (ret < 0)
684 : : retval = ret;
685 : : }
686 : :
687 : : has_zeroout:
688 : 217100 : up_write((&EXT4_I(inode)->i_data_sem));
689 [ + ][ + + ]: 217077 : if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
690 : 217080 : int ret = check_block_validity(inode, map);
691 [ + ]: 217087 : if (ret != 0)
692 : : return ret;
693 : : }
694 : 217089 : return retval;
695 : : }
696 : :
697 : : /* Maximum number of blocks we map for direct IO at once. */
698 : : #define DIO_MAX_BLOCKS 4096
699 : :
700 : 0 : static int _ext4_get_block(struct inode *inode, sector_t iblock,
701 : : struct buffer_head *bh, int flags)
702 : : {
703 : : handle_t *handle = ext4_journal_current_handle();
704 : : struct ext4_map_blocks map;
705 : : int ret = 0, started = 0;
706 : : int dio_credits;
707 : :
708 [ + ]: 745175 : if (ext4_has_inline_data(inode))
709 : : return -ERANGE;
710 : :
711 : 745180 : map.m_lblk = iblock;
712 : 745180 : map.m_len = bh->b_size >> inode->i_blkbits;
713 : :
714 [ + + ][ + - ]: 745180 : if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
[ + + ]
715 : : /* Direct IO write... */
716 [ - + ]: 214625 : if (map.m_len > DIO_MAX_BLOCKS)
717 : 0 : map.m_len = DIO_MAX_BLOCKS;
718 : 214625 : dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
719 : : handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
720 : : dio_credits);
721 [ - + ]: 214625 : if (IS_ERR(handle)) {
722 : : ret = PTR_ERR(handle);
723 : 0 : return ret;
724 : : }
725 : : started = 1;
726 : : }
727 : :
728 : 745180 : ret = ext4_map_blocks(handle, inode, &map, flags);
729 [ + + ]: 745297 : if (ret > 0) {
730 : : ext4_io_end_t *io_end = ext4_inode_aio(inode);
731 : :
732 : 548089 : map_bh(bh, inode->i_sb, map.m_pblk);
733 : 548089 : bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
734 [ - + ][ # # ]: 548089 : if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
735 : : set_buffer_defer_completion(bh);
736 : 548089 : bh->b_size = inode->i_sb->s_blocksize * map.m_len;
737 : : ret = 0;
738 : : }
739 [ + ]: 745297 : if (started)
740 : 214625 : ext4_journal_stop(handle);
741 : 745291 : return ret;
742 : : }
743 : :
744 : 0 : int ext4_get_block(struct inode *inode, sector_t iblock,
745 : : struct buffer_head *bh, int create)
746 : : {
747 : 600524 : return _ext4_get_block(inode, iblock, bh,
748 : : create ? EXT4_GET_BLOCKS_CREATE : 0);
749 : : }
750 : :
751 : : /*
752 : : * `handle' can be NULL if create is zero
753 : : */
754 : 0 : struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
755 : : ext4_lblk_t block, int create, int *errp)
756 : : {
757 : : struct ext4_map_blocks map;
758 : : struct buffer_head *bh;
759 : : int fatal = 0, err;
760 : :
761 [ - + ]: 1703726 : J_ASSERT(handle != NULL || create == 0);
762 : :
763 : 1703726 : map.m_lblk = block;
764 : 1703726 : map.m_len = 1;
765 : 1703726 : err = ext4_map_blocks(handle, inode, &map,
766 : : create ? EXT4_GET_BLOCKS_CREATE : 0);
767 : :
768 : : /* ensure we send some value back into *errp */
769 : 1703726 : *errp = 0;
770 : :
771 [ - + ]: 1703726 : if (create && err == 0)
772 : : err = -ENOSPC; /* should never happen */
773 [ - + ]: 1703726 : if (err < 0)
774 : 0 : *errp = err;
775 [ + ]: 1703726 : if (err <= 0)
776 : : return NULL;
777 : :
778 : 1703727 : bh = sb_getblk(inode->i_sb, map.m_pblk);
779 [ - + ]: 1703715 : if (unlikely(!bh)) {
780 : 0 : *errp = -ENOMEM;
781 : 0 : return NULL;
782 : : }
783 [ + + ]: 1703715 : if (map.m_flags & EXT4_MAP_NEW) {
784 [ - + ]: 41954 : J_ASSERT(create != 0);
785 [ - + ]: 41954 : J_ASSERT(handle != NULL);
786 : :
787 : : /*
788 : : * Now that we do not always journal data, we should
789 : : * keep in mind whether this should always journal the
790 : : * new buffer as metadata. For now, regular file
791 : : * writes use ext4_get_block instead, so it's not a
792 : : * problem.
793 : : */
794 : : lock_buffer(bh);
795 : : BUFFER_TRACE(bh, "call get_create_access");
796 : 41954 : fatal = ext4_journal_get_create_access(handle, bh);
797 [ + - ][ + + ]: 41954 : if (!fatal && !buffer_uptodate(bh)) {
798 [ + - ]: 22498 : memset(bh->b_data, 0, inode->i_sb->s_blocksize);
799 : : set_buffer_uptodate(bh);
800 : : }
801 : 41954 : unlock_buffer(bh);
802 : : BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
803 : 41954 : err = ext4_handle_dirty_metadata(handle, inode, bh);
804 [ + - ]: 41954 : if (!fatal)
805 : : fatal = err;
806 : : } else {
807 : : BUFFER_TRACE(bh, "not a new buffer");
808 : : }
809 [ - + ]: 1703715 : if (fatal) {
810 : 0 : *errp = fatal;
811 : : brelse(bh);
812 : : bh = NULL;
813 : : }
814 : 1703725 : return bh;
815 : : }
816 : :
817 : 0 : struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
818 : : ext4_lblk_t block, int create, int *err)
819 : : {
820 : : struct buffer_head *bh;
821 : :
822 : 629227 : bh = ext4_getblk(handle, inode, block, create, err);
823 [ + - ]: 629225 : if (!bh)
824 : : return bh;
825 [ + + ]: 629225 : if (buffer_uptodate(bh))
826 : : return bh;
827 : 274 : ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
828 : 274 : wait_on_buffer(bh);
829 [ - + ]: 274 : if (buffer_uptodate(bh))
830 : : return bh;
831 : : put_bh(bh);
832 : 0 : *err = -EIO;
833 : 0 : return NULL;
834 : : }
835 : :
836 : 0 : int ext4_walk_page_buffers(handle_t *handle,
837 : : struct buffer_head *head,
838 : : unsigned from,
839 : : unsigned to,
840 : : int *partial,
841 : : int (*fn)(handle_t *handle,
842 : : struct buffer_head *bh))
843 : : {
844 : : struct buffer_head *bh;
845 : : unsigned block_start, block_end;
846 : 62961 : unsigned blocksize = head->b_size;
847 : : int err, ret = 0;
848 : : struct buffer_head *next;
849 : :
850 [ + + ]: 125922 : for (bh = head, block_start = 0;
851 [ + + ]: 83606 : ret == 0 && (bh != head || !block_start);
852 : : block_start = block_end, bh = next) {
853 : 62961 : next = bh->b_this_page;
854 : 62961 : block_end = block_start + blocksize;
855 [ - + ]: 62961 : if (block_end <= from || block_start >= to) {
856 [ # # ][ # # ]: 0 : if (partial && !buffer_uptodate(bh))
857 : 0 : *partial = 1;
858 : 0 : continue;
859 : : }
860 : 62961 : err = (*fn)(handle, bh);
861 [ + - ]: 62961 : if (!ret)
862 : : ret = err;
863 : : }
864 : 62961 : return ret;
865 : : }
866 : :
867 : : /*
868 : : * To preserve ordering, it is essential that the hole instantiation and
869 : : * the data write be encapsulated in a single transaction. We cannot
870 : : * close off a transaction and start a new one between the ext4_get_block()
871 : : * and the commit_write(). So doing the jbd2_journal_start at the start of
872 : : * prepare_write() is the right place.
873 : : *
874 : : * Also, this function can nest inside ext4_writepage(). In that case, we
875 : : * *know* that ext4_writepage() has generated enough buffer credits to do the
876 : : * whole page. So we won't block on the journal in that case, which is good,
877 : : * because the caller may be PF_MEMALLOC.
878 : : *
879 : : * By accident, ext4 can be reentered when a transaction is open via
880 : : * quota file writes. If we were to commit the transaction while thus
881 : : * reentered, there can be a deadlock - we would be holding a quota
882 : : * lock, and the commit would never complete if another thread had a
883 : : * transaction open and was blocking on the quota lock - a ranking
884 : : * violation.
885 : : *
886 : : * So what we do is to rely on the fact that jbd2_journal_stop/journal_start
887 : : * will _not_ run commit under these circumstances because handle->h_ref
888 : : * is elevated. We'll still have enough credits for the tiny quotafile
889 : : * write.
890 : : */
891 : 0 : int do_journal_get_write_access(handle_t *handle,
892 : : struct buffer_head *bh)
893 : : {
894 : : int dirty = buffer_dirty(bh);
895 : : int ret;
896 : :
897 [ + - ][ + - ]: 7555 : if (!buffer_mapped(bh) || buffer_freed(bh))
898 : : return 0;
899 : : /*
900 : : * __block_write_begin() could have dirtied some buffers. Clean
901 : : * the dirty bit as jbd2_journal_get_write_access() could complain
902 : : * otherwise about fs integrity issues. Setting of the dirty bit
903 : : * by __block_write_begin() isn't a real problem here as we clear
904 : : * the bit before releasing a page lock and thus writeback cannot
905 : : * ever write the buffer.
906 : : */
907 [ - + ]: 7555 : if (dirty)
908 : : clear_buffer_dirty(bh);
909 : 7555 : ret = ext4_journal_get_write_access(handle, bh);
910 [ - + ]: 7555 : if (!ret && dirty)
911 : 0 : ret = ext4_handle_dirty_metadata(handle, NULL, bh);
912 : 7555 : return ret;
913 : : }
914 : :
915 : : static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
916 : : struct buffer_head *bh_result, int create);
917 : 0 : static int ext4_write_begin(struct file *file, struct address_space *mapping,
918 : : loff_t pos, unsigned len, unsigned flags,
919 : : struct page **pagep, void **fsdata)
920 : : {
921 : 15110 : struct inode *inode = mapping->host;
922 : : int ret, needed_blocks;
923 : : handle_t *handle;
924 : 7555 : int retries = 0;
925 : : struct page *page;
926 : : pgoff_t index;
927 : : unsigned from, to;
928 : :
929 : : trace_ext4_write_begin(inode, pos, len, flags);
930 : : /*
931 : : * Reserve one block more for addition to orphan list in case
932 : : * we allocate blocks but write fails for some reason
933 : : */
934 : 7555 : needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
935 : 7555 : index = pos >> PAGE_CACHE_SHIFT;
936 : 7555 : from = pos & (PAGE_CACHE_SIZE - 1);
937 : 7555 : to = from + len;
938 : :
939 [ + - ]: 7555 : if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
940 : 0 : ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
941 : : flags, pagep);
942 [ # # ]: 0 : if (ret < 0)
943 : : return ret;
944 [ # # ]: 0 : if (ret == 1)
945 : : return 0;
946 : : }
947 : :
948 : : /*
949 : : * grab_cache_page_write_begin() can take a long time if the
950 : : * system is thrashing due to memory pressure, or if the page
951 : : * is being written back. So grab it first before we start
952 : : * the transaction handle. This also allows us to allocate
953 : : * the page (if needed) without using GFP_NOFS.
954 : : */
955 : : retry_grab:
956 : 7555 : page = grab_cache_page_write_begin(mapping, index, flags);
957 [ + - ]: 7555 : if (!page)
958 : : return -ENOMEM;
959 : 7555 : unlock_page(page);
960 : :
961 : : retry_journal:
962 : : handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
963 [ - + ]: 7555 : if (IS_ERR(handle)) {
964 : 0 : page_cache_release(page);
965 : 0 : return PTR_ERR(handle);
966 : : }
967 : :
968 : : lock_page(page);
969 [ - + ]: 7555 : if (page->mapping != mapping) {
970 : : /* The page got truncated from under us */
971 : 0 : unlock_page(page);
972 : 0 : page_cache_release(page);
973 : 0 : ext4_journal_stop(handle);
974 : 0 : goto retry_grab;
975 : : }
976 : : /* In case writeback began while the page was unlocked */
977 : 7555 : wait_for_stable_page(page);
978 : :
979 [ - + ]: 7555 : if (ext4_should_dioread_nolock(inode))
980 : 0 : ret = __block_write_begin(page, pos, len, ext4_get_block_write);
981 : : else
982 : 7555 : ret = __block_write_begin(page, pos, len, ext4_get_block);
983 : :
984 [ + - ][ + - ]: 22665 : if (!ret && ext4_should_journal_data(inode)) {
985 [ - + ]: 7555 : ret = ext4_walk_page_buffers(handle, page_buffers(page),
986 : : from, to, NULL,
987 : : do_journal_get_write_access);
988 : : }
989 : :
990 [ - + ]: 7555 : if (ret) {
991 : 0 : unlock_page(page);
992 : : /*
993 : : * __block_write_begin may have instantiated a few blocks
994 : : * outside i_size. Trim these off again. Don't need
995 : : * i_size_read because we hold i_mutex.
996 : : *
997 : : * Add inode to orphan list in case we crash before
998 : : * truncate finishes
999 : : */
1000 [ # # ][ # # ]: 0 : if (pos + len > inode->i_size && ext4_can_truncate(inode))
1001 : 0 : ext4_orphan_add(handle, inode);
1002 : :
1003 : 0 : ext4_journal_stop(handle);
1004 [ # # ]: 0 : if (pos + len > inode->i_size) {
1005 : : ext4_truncate_failed_write(inode);
1006 : : /*
1007 : : * If truncate failed early the inode might
1008 : : * still be on the orphan list; we need to
1009 : : * make sure the inode is removed from the
1010 : : * orphan list in that case.
1011 : : */
1012 [ # # ]: 0 : if (inode->i_nlink)
1013 : 0 : ext4_orphan_del(NULL, inode);
1014 : : }
1015 : :
1016 [ # # # # ]: 0 : if (ret == -ENOSPC &&
1017 : 0 : ext4_should_retry_alloc(inode->i_sb, &retries))
1018 : : goto retry_journal;
1019 : 0 : page_cache_release(page);
1020 : 0 : return ret;
1021 : : }
1022 : 7555 : *pagep = page;
1023 : 7555 : return ret;
1024 : : }
1025 : :
1026 : : /* For write_end() in data=journal mode */
1027 : 0 : static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1028 : : {
1029 : : int ret;
1030 [ + - ][ + - ]: 7555 : if (!buffer_mapped(bh) || buffer_freed(bh))
1031 : : return 0;
1032 : : set_buffer_uptodate(bh);
1033 : 7555 : ret = ext4_handle_dirty_metadata(handle, NULL, bh);
1034 : : clear_buffer_meta(bh);
1035 : : clear_buffer_prio(bh);
1036 : 7555 : return ret;
1037 : : }
1038 : :
1039 : : /*
1040 : : * We need to pick up the new inode size which generic_commit_write gave us
1041 : : * `file' can be NULL - eg, when called from page_symlink().
1042 : : *
1043 : : * ext4 never places buffers on inode->i_mapping->private_list. metadata
1044 : : * buffers are managed internally.
1045 : : */
1046 : 0 : static int ext4_write_end(struct file *file,
1047 : : struct address_space *mapping,
1048 : : loff_t pos, unsigned len, unsigned copied,
1049 : : struct page *page, void *fsdata)
1050 : : {
1051 : : handle_t *handle = ext4_journal_current_handle();
1052 : 0 : struct inode *inode = mapping->host;
1053 : : int ret = 0, ret2;
1054 : : int i_size_changed = 0;
1055 : :
1056 : : trace_ext4_write_end(inode, pos, len, copied);
1057 [ # # ]: 0 : if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) {
1058 : : ret = ext4_jbd2_file_inode(handle, inode);
1059 [ # # ]: 0 : if (ret) {
1060 : 0 : unlock_page(page);
1061 : 0 : page_cache_release(page);
1062 : 0 : goto errout;
1063 : : }
1064 : : }
1065 : :
1066 [ # # ]: 0 : if (ext4_has_inline_data(inode)) {
1067 : 0 : ret = ext4_write_inline_data_end(inode, pos, len,
1068 : : copied, page);
1069 [ # # ]: 0 : if (ret < 0)
1070 : : goto errout;
1071 : 0 : copied = ret;
1072 : : } else
1073 : 0 : copied = block_write_end(file, mapping, pos,
1074 : : len, copied, page, fsdata);
1075 : :
1076 : : /*
1077 : : * No need to use i_size_read() here, the i_size
1078 : : * cannot change under us because we hole i_mutex.
1079 : : *
1080 : : * But it's important to update i_size while still holding page lock:
1081 : : * page writeout could otherwise come in and zero beyond i_size.
1082 : : */
1083 [ # # ]: 0 : if (pos + copied > inode->i_size) {
1084 : : i_size_write(inode, pos + copied);
1085 : : i_size_changed = 1;
1086 : : }
1087 : :
1088 [ # # ]: 0 : if (pos + copied > EXT4_I(inode)->i_disksize) {
1089 : : /* We need to mark inode dirty even if
1090 : : * new_i_size is less that inode->i_size
1091 : : * but greater than i_disksize. (hint delalloc)
1092 : : */
1093 : : ext4_update_i_disksize(inode, (pos + copied));
1094 : : i_size_changed = 1;
1095 : : }
1096 : 0 : unlock_page(page);
1097 : 0 : page_cache_release(page);
1098 : :
1099 : : /*
1100 : : * Don't mark the inode dirty under page lock. First, it unnecessarily
1101 : : * makes the holding time of page lock longer. Second, it forces lock
1102 : : * ordering of page lock and transaction start for journaling
1103 : : * filesystems.
1104 : : */
1105 [ # # ]: 0 : if (i_size_changed)
1106 : 0 : ext4_mark_inode_dirty(handle, inode);
1107 : :
1108 [ # # ][ # # ]: 0 : if (pos + len > inode->i_size && ext4_can_truncate(inode))
1109 : : /* if we have allocated more blocks and copied
1110 : : * less. We will have blocks allocated outside
1111 : : * inode->i_size. So truncate them
1112 : : */
1113 : 0 : ext4_orphan_add(handle, inode);
1114 : : errout:
1115 : 0 : ret2 = ext4_journal_stop(handle);
1116 [ # # ]: 0 : if (!ret)
1117 : : ret = ret2;
1118 : :
1119 [ # # ]: 0 : if (pos + len > inode->i_size) {
1120 : : ext4_truncate_failed_write(inode);
1121 : : /*
1122 : : * If truncate failed early the inode might still be
1123 : : * on the orphan list; we need to make sure the inode
1124 : : * is removed from the orphan list in that case.
1125 : : */
1126 [ # # ]: 0 : if (inode->i_nlink)
1127 : 0 : ext4_orphan_del(NULL, inode);
1128 : : }
1129 : :
1130 [ # # ]: 0 : return ret ? ret : copied;
1131 : : }
1132 : :
1133 : 0 : static int ext4_journalled_write_end(struct file *file,
1134 : : struct address_space *mapping,
1135 : : loff_t pos, unsigned len, unsigned copied,
1136 : : struct page *page, void *fsdata)
1137 : : {
1138 : : handle_t *handle = ext4_journal_current_handle();
1139 : 7555 : struct inode *inode = mapping->host;
1140 : : int ret = 0, ret2;
1141 : 7555 : int partial = 0;
1142 : : unsigned from, to;
1143 : : loff_t new_i_size;
1144 : :
1145 : : trace_ext4_journalled_write_end(inode, pos, len, copied);
1146 : 7555 : from = pos & (PAGE_CACHE_SIZE - 1);
1147 : 7555 : to = from + len;
1148 : :
1149 [ - + ]: 7555 : BUG_ON(!ext4_handle_valid(handle));
1150 : :
1151 [ - + ]: 7555 : if (ext4_has_inline_data(inode))
1152 : 0 : copied = ext4_write_inline_data_end(inode, pos, len,
1153 : : copied, page);
1154 : : else {
1155 [ - + ]: 7555 : if (copied < len) {
1156 [ # # ]: 0 : if (!PageUptodate(page))
1157 : : copied = 0;
1158 : 0 : page_zero_new_buffers(page, from+copied, to);
1159 : : }
1160 : :
1161 [ - + ]: 7555 : ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
1162 : : to, &partial, write_end_fn);
1163 [ + - ]: 7555 : if (!partial)
1164 : : SetPageUptodate(page);
1165 : : }
1166 : 7555 : new_i_size = pos + copied;
1167 [ + - ]: 7555 : if (new_i_size > inode->i_size)
1168 : : i_size_write(inode, pos+copied);
1169 : : ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1170 : 7555 : EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1171 [ + - ]: 7555 : if (new_i_size > EXT4_I(inode)->i_disksize) {
1172 : : ext4_update_i_disksize(inode, new_i_size);
1173 : 7555 : ret2 = ext4_mark_inode_dirty(handle, inode);
1174 [ + - ]: 7555 : if (!ret)
1175 : : ret = ret2;
1176 : : }
1177 : :
1178 : 7555 : unlock_page(page);
1179 : 7555 : page_cache_release(page);
1180 [ - + ][ # # ]: 7555 : if (pos + len > inode->i_size && ext4_can_truncate(inode))
1181 : : /* if we have allocated more blocks and copied
1182 : : * less. We will have blocks allocated outside
1183 : : * inode->i_size. So truncate them
1184 : : */
1185 : 0 : ext4_orphan_add(handle, inode);
1186 : :
1187 : 7555 : ret2 = ext4_journal_stop(handle);
1188 [ + - ]: 7555 : if (!ret)
1189 : : ret = ret2;
1190 [ - + ]: 7555 : if (pos + len > inode->i_size) {
1191 : : ext4_truncate_failed_write(inode);
1192 : : /*
1193 : : * If truncate failed early the inode might still be
1194 : : * on the orphan list; we need to make sure the inode
1195 : : * is removed from the orphan list in that case.
1196 : : */
1197 [ # # ]: 0 : if (inode->i_nlink)
1198 : 0 : ext4_orphan_del(NULL, inode);
1199 : : }
1200 : :
1201 [ + - ]: 7555 : return ret ? ret : copied;
1202 : : }
1203 : :
1204 : : /*
1205 : : * Reserve a metadata for a single block located at lblock
1206 : : */
1207 : 0 : static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1208 : : {
1209 : 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1210 : : struct ext4_inode_info *ei = EXT4_I(inode);
1211 : : unsigned int md_needed;
1212 : : ext4_lblk_t save_last_lblock;
1213 : : int save_len;
1214 : :
1215 : : /*
1216 : : * recalculate the amount of metadata blocks to reserve
1217 : : * in order to allocate nrblocks
1218 : : * worse case is one extent per block
1219 : : */
1220 : : spin_lock(&ei->i_block_reservation_lock);
1221 : : /*
1222 : : * ext4_calc_metadata_amount() has side effects, which we have
1223 : : * to be prepared undo if we fail to claim space.
1224 : : */
1225 : 0 : save_len = ei->i_da_metadata_calc_len;
1226 : 0 : save_last_lblock = ei->i_da_metadata_calc_last_lblock;
1227 : 0 : md_needed = EXT4_NUM_B2C(sbi,
1228 : : ext4_calc_metadata_amount(inode, lblock));
1229 : 0 : trace_ext4_da_reserve_space(inode, md_needed);
1230 : :
1231 : : /*
1232 : : * We do still charge estimated metadata to the sb though;
1233 : : * we cannot afford to run out of free blocks.
1234 : : */
1235 [ # # ]: 0 : if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
1236 : 0 : ei->i_da_metadata_calc_len = save_len;
1237 : 0 : ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1238 : : spin_unlock(&ei->i_block_reservation_lock);
1239 : 0 : return -ENOSPC;
1240 : : }
1241 : 0 : ei->i_reserved_meta_blocks += md_needed;
1242 : : spin_unlock(&ei->i_block_reservation_lock);
1243 : :
1244 : 0 : return 0; /* success */
1245 : : }
1246 : :
1247 : : /*
1248 : : * Reserve a single cluster located at lblock
1249 : : */
1250 : 0 : static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1251 : : {
1252 : 1828638 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1253 : : struct ext4_inode_info *ei = EXT4_I(inode);
1254 : : unsigned int md_needed;
1255 : : int ret;
1256 : : ext4_lblk_t save_last_lblock;
1257 : : int save_len;
1258 : :
1259 : : /*
1260 : : * We will charge metadata quota at writeout time; this saves
1261 : : * us from metadata over-estimation, though we may go over by
1262 : : * a small amount in the end. Here we just reserve for data.
1263 : : */
1264 : 1828638 : ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1265 [ + + ]: 1828668 : if (ret)
1266 : : return ret;
1267 : :
1268 : : /*
1269 : : * recalculate the amount of metadata blocks to reserve
1270 : : * in order to allocate nrblocks
1271 : : * worse case is one extent per block
1272 : : */
1273 : : spin_lock(&ei->i_block_reservation_lock);
1274 : : /*
1275 : : * ext4_calc_metadata_amount() has side effects, which we have
1276 : : * to be prepared undo if we fail to claim space.
1277 : : */
1278 : 1828680 : save_len = ei->i_da_metadata_calc_len;
1279 : 1828680 : save_last_lblock = ei->i_da_metadata_calc_last_lblock;
1280 : 1828680 : md_needed = EXT4_NUM_B2C(sbi,
1281 : : ext4_calc_metadata_amount(inode, lblock));
1282 : 1828269 : trace_ext4_da_reserve_space(inode, md_needed);
1283 : :
1284 : : /*
1285 : : * We do still charge estimated metadata to the sb though;
1286 : : * we cannot afford to run out of free blocks.
1287 : : */
1288 [ - + ]: 1828269 : if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1289 : 0 : ei->i_da_metadata_calc_len = save_len;
1290 : 0 : ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1291 : : spin_unlock(&ei->i_block_reservation_lock);
1292 : 0 : dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1293 : 0 : return -ENOSPC;
1294 : : }
1295 : 1828634 : ei->i_reserved_data_blocks++;
1296 : 1828634 : ei->i_reserved_meta_blocks += md_needed;
1297 : : spin_unlock(&ei->i_block_reservation_lock);
1298 : :
1299 : 1828649 : return 0; /* success */
1300 : : }
1301 : :
1302 : 0 : static void ext4_da_release_space(struct inode *inode, int to_free)
1303 : : {
1304 : 1178257 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1305 : : struct ext4_inode_info *ei = EXT4_I(inode);
1306 : :
1307 [ + + ]: 1178257 : if (!to_free)
1308 : 1178265 : return; /* Nothing to release, exit */
1309 : :
1310 : : spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1311 : :
1312 : : trace_ext4_da_release_space(inode, to_free);
1313 [ - + ]: 2356519 : if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1314 : : /*
1315 : : * if there aren't enough reserved blocks, then the
1316 : : * counter is messed up somewhere. Since this
1317 : : * function is called from invalidate page, it's
1318 : : * harmless to return without any action.
1319 : : */
1320 : 0 : ext4_warning(inode->i_sb, "ext4_da_release_space: "
1321 : : "ino %lu, to_free %d with only %d reserved "
1322 : : "data blocks", inode->i_ino, to_free,
1323 : : ei->i_reserved_data_blocks);
1324 : 0 : WARN_ON(1);
1325 : 0 : to_free = ei->i_reserved_data_blocks;
1326 : : }
1327 : 1178262 : ei->i_reserved_data_blocks -= to_free;
1328 : :
1329 [ + + ]: 1178262 : if (ei->i_reserved_data_blocks == 0) {
1330 : : /*
1331 : : * We can release all of the reserved metadata blocks
1332 : : * only when we have written all of the delayed
1333 : : * allocation blocks.
1334 : : * Note that in case of bigalloc, i_reserved_meta_blocks,
1335 : : * i_reserved_data_blocks, etc. refer to number of clusters.
1336 : : */
1337 : 58764 : percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1338 : 29382 : ei->i_reserved_meta_blocks);
1339 : 29382 : ei->i_reserved_meta_blocks = 0;
1340 : 29382 : ei->i_da_metadata_calc_len = 0;
1341 : : }
1342 : :
1343 : : /* update fs dirty data blocks counter */
1344 : 1178262 : percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1345 : :
1346 : : spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1347 : :
1348 : 1178268 : dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1349 : : }
1350 : :
1351 : 0 : static void ext4_da_page_release_reservation(struct page *page,
1352 : : unsigned int offset,
1353 : : unsigned int length)
1354 : : {
1355 : : int to_release = 0;
1356 : : struct buffer_head *head, *bh;
1357 : : unsigned int curr_off = 0;
1358 : 1713933 : struct inode *inode = page->mapping->host;
1359 : 1713933 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1360 : 1713933 : unsigned int stop = offset + length;
1361 : : int num_clusters;
1362 : : ext4_fsblk_t lblk;
1363 : :
1364 [ - + ]: 1713933 : BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
1365 : :
1366 [ - + ]: 1713933 : head = page_buffers(page);
1367 : : bh = head;
1368 : : do {
1369 : 1713918 : unsigned int next_off = curr_off + bh->b_size;
1370 : :
1371 [ + ]: 3427851 : if (next_off > stop)
1372 : : break;
1373 : :
1374 [ + + ][ + + ]: 1713937 : if ((offset <= curr_off) && (buffer_delay(bh))) {
1375 : 1178260 : to_release++;
1376 : : clear_buffer_delay(bh);
1377 : : }
1378 : : curr_off = next_off;
1379 [ + + ]: 3427859 : } while ((bh = bh->b_this_page) != head);
1380 : :
1381 [ + + ]: 1713922 : if (to_release) {
1382 : 1178268 : lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1383 : 1178268 : ext4_es_remove_extent(inode, lblk, to_release);
1384 : : }
1385 : :
1386 : : /* If we have released all the blocks belonging to a cluster, then we
1387 : : * need to release the reserved space for that cluster. */
1388 : 1713911 : num_clusters = EXT4_NUM_B2C(sbi, to_release);
1389 [ + + ]: 2892176 : while (num_clusters > 0) {
1390 : 2356512 : lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1391 : 1178256 : ((num_clusters - 1) << sbi->s_cluster_bits);
1392 [ - + # # ]: 1178256 : if (sbi->s_cluster_ratio == 1 ||
1393 : 0 : !ext4_find_delalloc_cluster(inode, lblk))
1394 : 1178256 : ext4_da_release_space(inode, 1);
1395 : :
1396 : : num_clusters--;
1397 : : }
1398 : 1713920 : }
1399 : :
1400 : : /*
1401 : : * Delayed allocation stuff
1402 : : */
1403 : :
1404 : : struct mpage_da_data {
1405 : : struct inode *inode;
1406 : : struct writeback_control *wbc;
1407 : :
1408 : : pgoff_t first_page; /* The first page to write */
1409 : : pgoff_t next_page; /* Current page to examine */
1410 : : pgoff_t last_page; /* Last page to examine */
1411 : : /*
1412 : : * Extent to map - this can be after first_page because that can be
1413 : : * fully mapped. We somewhat abuse m_flags to store whether the extent
1414 : : * is delalloc or unwritten.
1415 : : */
1416 : : struct ext4_map_blocks map;
1417 : : struct ext4_io_submit io_submit; /* IO submission data */
1418 : : };
1419 : :
1420 : 0 : static void mpage_release_unused_pages(struct mpage_da_data *mpd,
1421 : : bool invalidate)
1422 : : {
1423 : : int nr_pages, i;
1424 : : pgoff_t index, end;
1425 : : struct pagevec pvec;
1426 : 154700 : struct inode *inode = mpd->inode;
1427 : 154700 : struct address_space *mapping = inode->i_mapping;
1428 : :
1429 : : /* This is necessary when next_page == 0. */
1430 [ + + ]: 154700 : if (mpd->first_page >= mpd->next_page)
1431 : 153355 : return;
1432 : :
1433 : : index = mpd->first_page;
1434 : 1345 : end = mpd->next_page - 1;
1435 [ - + ]: 1345 : if (invalidate) {
1436 : : ext4_lblk_t start, last;
1437 : 0 : start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1438 : 0 : last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1439 : 0 : ext4_es_remove_extent(inode, start, last - start + 1);
1440 : : }
1441 : :
1442 : : pagevec_init(&pvec, 0);
1443 [ + + ]: 3804 : while (index <= end) {
1444 : 2459 : nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
1445 [ + - ]: 2459 : if (nr_pages == 0)
1446 : : break;
1447 [ + + ]: 19827 : for (i = 0; i < nr_pages; i++) {
1448 : 18692 : struct page *page = pvec.pages[i];
1449 [ + + ]: 18692 : if (page->index > end)
1450 : : break;
1451 [ - + ]: 17368 : BUG_ON(!PageLocked(page));
1452 [ - + ]: 17368 : BUG_ON(PageWriteback(page));
1453 [ - + ]: 17368 : if (invalidate) {
1454 : 0 : block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
1455 : : ClearPageUptodate(page);
1456 : : }
1457 : 17368 : unlock_page(page);
1458 : : }
1459 : 2459 : index = pvec.pages[nr_pages - 1]->index + 1;
1460 : : pagevec_release(&pvec);
1461 : : }
1462 : : }
1463 : :
1464 : 0 : static void ext4_print_free_blocks(struct inode *inode)
1465 : : {
1466 : 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1467 : 0 : struct super_block *sb = inode->i_sb;
1468 : : struct ext4_inode_info *ei = EXT4_I(inode);
1469 : :
1470 : 0 : ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
1471 : : EXT4_C2B(EXT4_SB(inode->i_sb),
1472 : : ext4_count_free_clusters(sb)));
1473 : 0 : ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
1474 : 0 : ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
1475 : : (long long) EXT4_C2B(EXT4_SB(sb),
1476 : : percpu_counter_sum(&sbi->s_freeclusters_counter)));
1477 : 0 : ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
1478 : : (long long) EXT4_C2B(EXT4_SB(sb),
1479 : : percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1480 : 0 : ext4_msg(sb, KERN_CRIT, "Block reservation details");
1481 : 0 : ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
1482 : : ei->i_reserved_data_blocks);
1483 : 0 : ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
1484 : : ei->i_reserved_meta_blocks);
1485 : 0 : ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
1486 : : ei->i_allocated_meta_blocks);
1487 : 0 : return;
1488 : : }
1489 : :
1490 : 0 : static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1491 : : {
1492 [ + + ][ + ]: 47851 : return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
[ - + ]
1493 : : }
1494 : :
1495 : : /*
1496 : : * This function is grabs code from the very beginning of
1497 : : * ext4_map_blocks, but assumes that the caller is from delayed write
1498 : : * time. This function looks up the requested blocks and sets the
1499 : : * buffer delay bit under the protection of i_data_sem.
1500 : : */
1501 : 0 : static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1502 : : struct ext4_map_blocks *map,
1503 : : struct buffer_head *bh)
1504 : : {
1505 : : struct extent_status es;
1506 : : int retval;
1507 : : sector_t invalid_block = ~((sector_t) 0xffff);
1508 : : #ifdef ES_AGGRESSIVE_TEST
1509 : : struct ext4_map_blocks orig_map;
1510 : :
1511 : : memcpy(&orig_map, map, sizeof(*map));
1512 : : #endif
1513 : :
1514 [ - + ]: 1828552 : if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1515 : : invalid_block = ~0;
1516 : :
1517 : 1828552 : map->m_flags = 0;
1518 : : ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1519 : : "logical block %lu\n", inode->i_ino, map->m_len,
1520 : : (unsigned long) map->m_lblk);
1521 : :
1522 : : /* Lookup extent status tree firstly */
1523 [ + + ]: 1828552 : if (ext4_es_lookup_extent(inode, iblock, &es)) {
1524 : 71452 : ext4_es_lru_add(inode);
1525 [ + + ]: 1899948 : if (ext4_es_is_hole(&es)) {
1526 : : retval = 0;
1527 : 71385 : down_read((&EXT4_I(inode)->i_data_sem));
1528 : 71454 : goto add_delayed;
1529 : : }
1530 : :
1531 : : /*
1532 : : * Delayed extent could be allocated by fallocate.
1533 : : * So we need to check it.
1534 : : */
1535 [ - + ][ # # ]: 1828563 : if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
1536 : 0 : map_bh(bh, inode->i_sb, invalid_block);
1537 : : set_buffer_new(bh);
1538 : : set_buffer_delay(bh);
1539 : 0 : return 0;
1540 : : }
1541 : :
1542 : 11 : map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
1543 : 11 : retval = es.es_len - (iblock - es.es_lblk);
1544 [ - + ]: 11 : if (retval > map->m_len)
1545 : 0 : retval = map->m_len;
1546 : 11 : map->m_len = retval;
1547 [ + + ]: 11 : if (ext4_es_is_written(&es))
1548 : 9 : map->m_flags |= EXT4_MAP_MAPPED;
1549 [ + - ]: 2 : else if (ext4_es_is_unwritten(&es))
1550 : 2 : map->m_flags |= EXT4_MAP_UNWRITTEN;
1551 : : else
1552 : 0 : BUG_ON(1);
1553 : :
1554 : : #ifdef ES_AGGRESSIVE_TEST
1555 : : ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
1556 : : #endif
1557 : 11 : return retval;
1558 : : }
1559 : :
1560 : : /*
1561 : : * Try to see if we can get the block without requesting a new
1562 : : * file system block.
1563 : : */
1564 : 1757239 : down_read((&EXT4_I(inode)->i_data_sem));
1565 [ - + ]: 1757244 : if (ext4_has_inline_data(inode)) {
1566 : : /*
1567 : : * We will soon create blocks for this page, and let
1568 : : * us pretend as if the blocks aren't allocated yet.
1569 : : * In case of clusters, we have to handle the work
1570 : : * of mapping from cluster so that the reserved space
1571 : : * is calculated properly.
1572 : : */
1573 [ # # # # ]: 0 : if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
1574 : 0 : ext4_find_delalloc_cluster(inode, map->m_lblk))
1575 : 0 : map->m_flags |= EXT4_MAP_FROM_CLUSTER;
1576 : : retval = 0;
1577 [ + - ]: 1757236 : } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1578 : 1757236 : retval = ext4_ext_map_blocks(NULL, inode, map,
1579 : : EXT4_GET_BLOCKS_NO_PUT_HOLE);
1580 : : else
1581 : 0 : retval = ext4_ind_map_blocks(NULL, inode, map,
1582 : : EXT4_GET_BLOCKS_NO_PUT_HOLE);
1583 : :
1584 : : add_delayed:
1585 [ + + ]: 1828385 : if (retval == 0) {
1586 : : int ret;
1587 : : /*
1588 : : * XXX: __block_prepare_write() unmaps passed block,
1589 : : * is it OK?
1590 : : */
1591 : : /*
1592 : : * If the block was allocated from previously allocated cluster,
1593 : : * then we don't need to reserve it again. However we still need
1594 : : * to reserve metadata for every block we're going to write.
1595 : : */
1596 [ + - ]: 1828374 : if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1597 : 1828374 : ret = ext4_da_reserve_space(inode, iblock);
1598 [ + - ]: 1828683 : if (ret) {
1599 : : /* not enough space to reserve */
1600 : : retval = ret;
1601 : : goto out_unlock;
1602 : : }
1603 : : } else {
1604 : 0 : ret = ext4_da_reserve_metadata(inode, iblock);
1605 [ # # ]: 0 : if (ret) {
1606 : : /* not enough space to reserve */
1607 : : retval = ret;
1608 : : goto out_unlock;
1609 : : }
1610 : : }
1611 : :
1612 : 1828683 : ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1613 : : ~0, EXTENT_STATUS_DELAYED);
1614 [ + + ]: 1828664 : if (ret) {
1615 : : retval = ret;
1616 : : goto out_unlock;
1617 : : }
1618 : :
1619 : : /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1620 : : * and it should not appear on the bh->b_state.
1621 : : */
1622 : 1828641 : map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
1623 : :
1624 : 1828641 : map_bh(bh, inode->i_sb, invalid_block);
1625 : : set_buffer_new(bh);
1626 : : set_buffer_delay(bh);
1627 [ + - ]: 11 : } else if (retval > 0) {
1628 : : int ret;
1629 : : unsigned int status;
1630 : :
1631 [ - + ]: 11 : if (unlikely(retval != map->m_len)) {
1632 : 0 : ext4_warning(inode->i_sb,
1633 : : "ES len assertion failed for inode "
1634 : : "%lu: retval %d != map->m_len %d",
1635 : : inode->i_ino, retval, map->m_len);
1636 : 0 : WARN_ON(1);
1637 : : }
1638 : :
1639 [ + - ]: 11 : status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1640 : : EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1641 : 11 : ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1642 : : map->m_pblk, status);
1643 [ - + ]: 11 : if (ret != 0)
1644 : : retval = ret;
1645 : : }
1646 : :
1647 : : out_unlock:
1648 : 1828632 : up_read((&EXT4_I(inode)->i_data_sem));
1649 : :
1650 : 1828701 : return retval;
1651 : : }
1652 : :
1653 : : /*
1654 : : * This is a special get_blocks_t callback which is used by
1655 : : * ext4_da_write_begin(). It will either return mapped block or
1656 : : * reserve space for a single block.
1657 : : *
1658 : : * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
1659 : : * We also have b_blocknr = -1 and b_bdev initialized properly
1660 : : *
1661 : : * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
1662 : : * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
1663 : : * initialized properly.
1664 : : */
1665 : 0 : int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1666 : : struct buffer_head *bh, int create)
1667 : : {
1668 : : struct ext4_map_blocks map;
1669 : : int ret = 0;
1670 : :
1671 [ - + ]: 1828292 : BUG_ON(create == 0);
1672 [ - + ]: 1828292 : BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
1673 : :
1674 : 1828292 : map.m_lblk = iblock;
1675 : 1828292 : map.m_len = 1;
1676 : :
1677 : : /*
1678 : : * first, we need to know whether the block is allocated already
1679 : : * preallocated blocks are unmapped but should treated
1680 : : * the same as allocated blocks.
1681 : : */
1682 : 1828292 : ret = ext4_da_map_blocks(inode, iblock, &map, bh);
1683 [ + + ]: 1828623 : if (ret <= 0)
1684 : : return ret;
1685 : :
1686 : 22 : map_bh(bh, inode->i_sb, map.m_pblk);
1687 : 22 : bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
1688 : :
1689 [ + + ]: 22 : if (buffer_unwritten(bh)) {
1690 : : /* A delayed write to unwritten bh should be marked
1691 : : * new and mapped. Mapped ensures that we don't do
1692 : : * get_block multiple times when we write to the same
1693 : : * offset and new ensures that we do proper zero out
1694 : : * for partial write.
1695 : : */
1696 : : set_buffer_new(bh);
1697 : : set_buffer_mapped(bh);
1698 : : }
1699 : : return 0;
1700 : : }
1701 : :
1702 : 0 : static int bget_one(handle_t *handle, struct buffer_head *bh)
1703 : : {
1704 : : get_bh(bh);
1705 : 0 : return 0;
1706 : : }
1707 : :
1708 : 0 : static int bput_one(handle_t *handle, struct buffer_head *bh)
1709 : : {
1710 : : put_bh(bh);
1711 : 0 : return 0;
1712 : : }
1713 : :
1714 : 0 : static int __ext4_journalled_writepage(struct page *page,
1715 : : unsigned int len)
1716 : : {
1717 : 0 : struct address_space *mapping = page->mapping;
1718 : 0 : struct inode *inode = mapping->host;
1719 : : struct buffer_head *page_bufs = NULL;
1720 : : handle_t *handle = NULL;
1721 : : int ret = 0, err = 0;
1722 : 0 : int inline_data = ext4_has_inline_data(inode);
1723 : : struct buffer_head *inode_bh = NULL;
1724 : :
1725 : : ClearPageChecked(page);
1726 : :
1727 [ # # ]: 0 : if (inline_data) {
1728 [ # # ]: 0 : BUG_ON(page->index != 0);
1729 [ # # ]: 0 : BUG_ON(len > ext4_get_max_inline_size(inode));
1730 : 0 : inode_bh = ext4_journalled_write_inline_data(inode, len, page);
1731 [ # # ]: 0 : if (inode_bh == NULL)
1732 : : goto out;
1733 : : } else {
1734 [ # # ]: 0 : page_bufs = page_buffers(page);
1735 [ # # ]: 0 : if (!page_bufs) {
1736 : 0 : BUG();
1737 : : goto out;
1738 : : }
1739 : 0 : ext4_walk_page_buffers(handle, page_bufs, 0, len,
1740 : : NULL, bget_one);
1741 : : }
1742 : : /* As soon as we unlock the page, it can go away, but we have
1743 : : * references to buffers so we are safe */
1744 : 0 : unlock_page(page);
1745 : :
1746 : 0 : handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
1747 : : ext4_writepage_trans_blocks(inode));
1748 [ # # ]: 0 : if (IS_ERR(handle)) {
1749 : : ret = PTR_ERR(handle);
1750 : 0 : goto out;
1751 : : }
1752 : :
1753 [ # # ]: 0 : BUG_ON(!ext4_handle_valid(handle));
1754 : :
1755 [ # # ]: 0 : if (inline_data) {
1756 : 0 : ret = ext4_journal_get_write_access(handle, inode_bh);
1757 : :
1758 : 0 : err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
1759 : :
1760 : : } else {
1761 : 0 : ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
1762 : : do_journal_get_write_access);
1763 : :
1764 : 0 : err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL,
1765 : : write_end_fn);
1766 : : }
1767 [ # # ]: 0 : if (ret == 0)
1768 : : ret = err;
1769 : 0 : EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1770 : 0 : err = ext4_journal_stop(handle);
1771 [ # # ]: 0 : if (!ret)
1772 : : ret = err;
1773 : :
1774 [ # # ]: 0 : if (!ext4_has_inline_data(inode))
1775 : 0 : ext4_walk_page_buffers(handle, page_bufs, 0, len,
1776 : : NULL, bput_one);
1777 : : ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1778 : : out:
1779 : : brelse(inode_bh);
1780 : 0 : return ret;
1781 : : }
1782 : :
1783 : : /*
1784 : : * Note that we don't need to start a transaction unless we're journaling data
1785 : : * because we should have holes filled from ext4_page_mkwrite(). We even don't
1786 : : * need to file the inode to the transaction's list in ordered mode because if
1787 : : * we are writing back data added by write(), the inode is already there and if
1788 : : * we are writing back data modified via mmap(), no one guarantees in which
1789 : : * transaction the data will hit the disk. In case we are journaling data, we
1790 : : * cannot start transaction directly because transaction start ranks above page
1791 : : * lock so we have to do some magic.
1792 : : *
1793 : : * This function can get called via...
1794 : : * - ext4_writepages after taking page lock (have journal handle)
1795 : : * - journal_submit_inode_data_buffers (no journal handle)
1796 : : * - shrink_page_list via the kswapd/direct reclaim (no journal handle)
1797 : : * - grab_page_cache when doing write_begin (have journal handle)
1798 : : *
1799 : : * We don't do any block allocation in this function. If we have page with
1800 : : * multiple blocks we need to write those buffer_heads that are mapped. This
1801 : : * is important for mmaped based write. So if we do with blocksize 1K
1802 : : * truncate(f, 1024);
1803 : : * a = mmap(f, 0, 4096);
1804 : : * a[0] = 'a';
1805 : : * truncate(f, 4096);
1806 : : * we have in the page first buffer_head mapped via page_mkwrite call back
1807 : : * but other buffer_heads would be unmapped but dirty (dirty done via the
1808 : : * do_wp_page). So writepage should write the first block. If we modify
1809 : : * the mmap area beyond 1024 we will again get a page_fault and the
1810 : : * page_mkwrite callback will do the block allocation and mark the
1811 : : * buffer_heads mapped.
1812 : : *
1813 : : * We redirty the page if we have any buffer_heads that is either delay or
1814 : : * unwritten in the page.
1815 : : *
1816 : : * We can get recursively called as show below.
1817 : : *
1818 : : * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
1819 : : * ext4_writepage()
1820 : : *
1821 : : * But since we don't do any block allocation we should not deadlock.
1822 : : * Page also have the dirty flag cleared so we don't get recurive page_lock.
1823 : : */
1824 : 0 : static int ext4_writepage(struct page *page,
1825 : : struct writeback_control *wbc)
1826 : : {
1827 : : int ret = 0;
1828 : : loff_t size;
1829 : : unsigned int len;
1830 : : struct buffer_head *page_bufs = NULL;
1831 : 47851 : struct inode *inode = page->mapping->host;
1832 : : struct ext4_io_submit io_submit;
1833 : :
1834 : : trace_ext4_writepage(page);
1835 : : size = i_size_read(inode);
1836 [ + + ]: 47851 : if (page->index == size >> PAGE_CACHE_SHIFT)
1837 : 5780 : len = size & ~PAGE_CACHE_MASK;
1838 : : else
1839 : : len = PAGE_CACHE_SIZE;
1840 : :
1841 [ - + ]: 47851 : page_bufs = page_buffers(page);
1842 : : /*
1843 : : * We cannot do block allocation or other extent handling in this
1844 : : * function. If there are buffers needing that, we have to redirty
1845 : : * the page. But we may reach here when we do a journal commit via
1846 : : * journal_submit_inode_data_buffers() and in that case we must write
1847 : : * allocated buffers to achieve data=ordered mode guarantees.
1848 : : */
1849 [ + + ]: 47851 : if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
1850 : : ext4_bh_delay_or_unwritten)) {
1851 : 42316 : redirty_page_for_writepage(wbc, page);
1852 [ + + ]: 42316 : if (current->flags & PF_MEMALLOC) {
1853 : : /*
1854 : : * For memory cleaning there's no point in writing only
1855 : : * some buffers. So just bail out. Warn if we came here
1856 : : * from direct reclaim.
1857 : : */
1858 [ - + ][ # # ]: 21 : WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD))
[ # # ]
1859 : : == PF_MEMALLOC);
1860 : 21 : unlock_page(page);
1861 : 21 : return 0;
1862 : : }
1863 : : }
1864 : :
1865 [ - + ][ # # ]: 47830 : if (PageChecked(page) && ext4_should_journal_data(inode))
1866 : : /*
1867 : : * It's mmapped pagecache. Add buffers and journal it. There
1868 : : * doesn't seem much point in redirtying the page here.
1869 : : */
1870 : 0 : return __ext4_journalled_writepage(page, len);
1871 : :
1872 : 47830 : ext4_io_submit_init(&io_submit, wbc);
1873 : 47830 : io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1874 [ - + ]: 47830 : if (!io_submit.io_end) {
1875 : 0 : redirty_page_for_writepage(wbc, page);
1876 : 0 : unlock_page(page);
1877 : 0 : return -ENOMEM;
1878 : : }
1879 : 47830 : ret = ext4_bio_write_page(&io_submit, page, len, wbc);
1880 : 47830 : ext4_io_submit(&io_submit);
1881 : : /* Drop io_end reference we got from init */
1882 : 47830 : ext4_put_io_end_defer(io_submit.io_end);
1883 : 47830 : return ret;
1884 : : }
1885 : :
1886 : 0 : static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
1887 : : {
1888 : : int len;
1889 : 915228 : loff_t size = i_size_read(mpd->inode);
1890 : : int err;
1891 : :
1892 [ - + ]: 914715 : BUG_ON(page->index != mpd->first_page);
1893 [ + + ]: 914715 : if (page->index == size >> PAGE_CACHE_SHIFT)
1894 : 22788 : len = size & ~PAGE_CACHE_MASK;
1895 : : else
1896 : : len = PAGE_CACHE_SIZE;
1897 : 914715 : clear_page_dirty_for_io(page);
1898 : 914753 : err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1899 [ + ]: 914737 : if (!err)
1900 : 914741 : mpd->wbc->nr_to_write--;
1901 : 914737 : mpd->first_page++;
1902 : :
1903 : 914737 : return err;
1904 : : }
1905 : :
1906 : : #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
1907 : :
1908 : : /*
1909 : : * mballoc gives us at most this number of blocks...
1910 : : * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
1911 : : * The rest of mballoc seems to handle chunks up to full group size.
1912 : : */
1913 : : #define MAX_WRITEPAGES_EXTENT_LEN 2048
1914 : :
1915 : : /*
1916 : : * mpage_add_bh_to_extent - try to add bh to extent of blocks to map
1917 : : *
1918 : : * @mpd - extent of blocks
1919 : : * @lblk - logical number of the block in the file
1920 : : * @bh - buffer head we want to add to the extent
1921 : : *
1922 : : * The function is used to collect contig. blocks in the same state. If the
1923 : : * buffer doesn't require mapping for writeback and we haven't started the
1924 : : * extent of buffers to map yet, the function returns 'true' immediately - the
1925 : : * caller can write the buffer right away. Otherwise the function returns true
1926 : : * if the block has been added to the extent, false if the block couldn't be
1927 : : * added.
1928 : : */
1929 : 0 : static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
1930 : : struct buffer_head *bh)
1931 : : {
1932 : : struct ext4_map_blocks *map = &mpd->map;
1933 : :
1934 : : /* Buffer that doesn't need mapping for writeback? */
1935 [ + + ][ + ]: 932056 : if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
[ + + ]
1936 [ + ]: 265975 : (!buffer_delay(bh) && !buffer_unwritten(bh))) {
1937 : : /* So far no extent to map => we write the buffer right away */
1938 [ + ]: 266017 : if (map->m_len == 0)
1939 : : return true;
1940 : 1259 : return false;
1941 : : }
1942 : :
1943 : : /* First block in the extent? */
1944 [ + + ]: 666039 : if (map->m_len == 0) {
1945 : 97724 : map->m_lblk = lblk;
1946 : 97724 : map->m_len = 1;
1947 : 97724 : map->m_flags = bh->b_state & BH_FLAGS;
1948 : 97724 : return true;
1949 : : }
1950 : :
1951 : : /* Don't go larger than mballoc is willing to allocate */
1952 [ + + ]: 568315 : if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
1953 : : return false;
1954 : :
1955 : : /* Can we merge the block to our big extent? */
1956 [ + - ][ + - ]: 568287 : if (lblk == map->m_lblk + map->m_len &&
1957 : 568287 : (bh->b_state & BH_FLAGS) == map->m_flags) {
1958 : 568287 : map->m_len++;
1959 : 568287 : return true;
1960 : : }
1961 : : return false;
1962 : : }
1963 : :
1964 : : /*
1965 : : * mpage_process_page_bufs - submit page buffers for IO or add them to extent
1966 : : *
1967 : : * @mpd - extent of blocks for mapping
1968 : : * @head - the first buffer in the page
1969 : : * @bh - buffer we should start processing from
1970 : : * @lblk - logical number of the block in the file corresponding to @bh
1971 : : *
1972 : : * Walk through page buffers from @bh upto @head (exclusive) and either submit
1973 : : * the page for IO if all buffers in this page were mapped and there's no
1974 : : * accumulated extent of buffers to map or add buffers in the page to the
1975 : : * extent of buffers to map. The function returns 1 if the caller can continue
1976 : : * by processing the next page, 0 if it should stop adding buffers to the
1977 : : * extent to map because we cannot extend it anymore. It can also return value
1978 : : * < 0 in case of error during IO submission.
1979 : : */
1980 : 0 : static int mpage_process_page_bufs(struct mpage_da_data *mpd,
1981 : : struct buffer_head *head,
1982 : : struct buffer_head *bh,
1983 : : ext4_lblk_t lblk)
1984 : : {
1985 : 933111 : struct inode *inode = mpd->inode;
1986 : : int err;
1987 : 932114 : ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
1988 : 932114 : >> inode->i_blkbits;
1989 : :
1990 : : do {
1991 [ - + ]: 932103 : BUG_ON(buffer_locked(bh));
1992 : :
1993 [ + + ][ + + ]: 932103 : if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
1994 : : /* Found extent to map? */
1995 [ + ]: 1381 : if (mpd->map.m_len)
1996 : : return 0;
1997 : : /* Everything mapped so far and we hit EOF */
1998 : : break;
1999 : : }
2000 [ + ]: 930694 : } while (lblk++, (bh = bh->b_this_page) != head);
2001 : : /* So far everything mapped? Submit the page for IO. */
2002 [ + + ]: 1862900 : if (mpd->map.m_len == 0) {
2003 : 264795 : err = mpage_submit_page(mpd, head->b_page);
2004 [ + + ]: 264800 : if (err < 0)
2005 : : return err;
2006 : : }
2007 : 930796 : return lblk < blocks;
2008 : : }
2009 : :
2010 : : /*
2011 : : * mpage_map_buffers - update buffers corresponding to changed extent and
2012 : : * submit fully mapped pages for IO
2013 : : *
2014 : : * @mpd - description of extent to map, on return next extent to map
2015 : : *
2016 : : * Scan buffers corresponding to changed extent (we expect corresponding pages
2017 : : * to be already locked) and update buffer state according to new extent state.
2018 : : * We map delalloc buffers to their physical location, clear unwritten bits,
2019 : : * and mark buffers as uninit when we perform writes to uninitialized extents
2020 : : * and do extent conversion after IO is finished. If the last page is not fully
2021 : : * mapped, we update @map to the next extent in the last page that needs
2022 : : * mapping. Otherwise we submit the page for IO.
2023 : : */
2024 : 0 : static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2025 : : {
2026 : : struct pagevec pvec;
2027 : : int nr_pages, i;
2028 : 97678 : struct inode *inode = mpd->inode;
2029 : : struct buffer_head *head, *bh;
2030 : 97678 : int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
2031 : : pgoff_t start, end;
2032 : : ext4_lblk_t lblk;
2033 : : sector_t pblock;
2034 : : int err;
2035 : :
2036 : 97678 : start = mpd->map.m_lblk >> bpp_bits;
2037 : 97678 : end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
2038 : 97678 : lblk = start << bpp_bits;
2039 : 97678 : pblock = mpd->map.m_pblk;
2040 : :
2041 : : pagevec_init(&pvec, 0);
2042 [ + + ]: 230570 : while (start <= end) {
2043 : 132874 : nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
2044 : : PAGEVEC_SIZE);
2045 [ + ]: 132892 : if (nr_pages == 0)
2046 : : break;
2047 [ + + ]: 782834 : for (i = 0; i < nr_pages; i++) {
2048 : 724272 : struct page *page = pvec.pages[i];
2049 : :
2050 [ + + ]: 724272 : if (page->index > end)
2051 : : break;
2052 : : /* Up to 'end' pages must be contiguous */
2053 [ - + ]: 649941 : BUG_ON(page->index != start);
2054 [ - + ]: 649941 : bh = head = page_buffers(page);
2055 : : do {
2056 [ - + ]: 649937 : if (lblk < mpd->map.m_lblk)
2057 : 0 : continue;
2058 [ - + ]: 649937 : if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
2059 : : /*
2060 : : * Buffer after end of mapped extent.
2061 : : * Find next buffer in the page to map.
2062 : : */
2063 : 0 : mpd->map.m_len = 0;
2064 : 0 : mpd->map.m_flags = 0;
2065 : : /*
2066 : : * FIXME: If dioread_nolock supports
2067 : : * blocksize < pagesize, we need to make
2068 : : * sure we add size mapped so far to
2069 : : * io_end->size as the following call
2070 : : * can submit the page for IO.
2071 : : */
2072 : 0 : err = mpage_process_page_bufs(mpd, head,
2073 : : bh, lblk);
2074 : : pagevec_release(&pvec);
2075 [ # # ]: 0 : if (err > 0)
2076 : : err = 0;
2077 : 0 : return err;
2078 : : }
2079 [ + + ]: 649937 : if (buffer_delay(bh)) {
2080 : : clear_buffer_delay(bh);
2081 : 649934 : bh->b_blocknr = pblock++;
2082 : : }
2083 : : clear_buffer_unwritten(bh);
2084 [ + ]: 649926 : } while (lblk++, (bh = bh->b_this_page) != head);
2085 : :
2086 : : /*
2087 : : * FIXME: This is going to break if dioread_nolock
2088 : : * supports blocksize < pagesize as we will try to
2089 : : * convert potentially unmapped parts of inode.
2090 : : */
2091 : 649930 : mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
2092 : : /* Page fully mapped - let IO run! */
2093 : 649930 : err = mpage_submit_page(mpd, page);
2094 [ - + ]: 649941 : if (err < 0) {
2095 : : pagevec_release(&pvec);
2096 : 0 : return err;
2097 : : }
2098 : 649941 : start++;
2099 : : }
2100 : : pagevec_release(&pvec);
2101 : : }
2102 : : /* Extent fully mapped and matches with page boundary. We are done. */
2103 : 97695 : mpd->map.m_len = 0;
2104 : 97695 : mpd->map.m_flags = 0;
2105 : 97695 : return 0;
2106 : : }
2107 : :
2108 : 0 : static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2109 : : {
2110 : 97723 : struct inode *inode = mpd->inode;
2111 : 97723 : struct ext4_map_blocks *map = &mpd->map;
2112 : : int get_blocks_flags;
2113 : : int err;
2114 : :
2115 : : trace_ext4_da_write_pages_extent(inode, map);
2116 : : /*
2117 : : * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
2118 : : * to convert an uninitialized extent to be initialized (in the case
2119 : : * where we have written into one or more preallocated blocks). It is
2120 : : * possible that we're going to need more metadata blocks than
2121 : : * previously reserved. However we must not fail because we're in
2122 : : * writeback and there is nothing we can do about it so it might result
2123 : : * in data loss. So use reserved blocks to allocate metadata if
2124 : : * possible.
2125 : : *
2126 : : * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks
2127 : : * in question are delalloc blocks. This affects functions in many
2128 : : * different parts of the allocation call path. This flag exists
2129 : : * primarily because we don't want to change *many* call functions, so
2130 : : * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
2131 : : * once the inode's allocation semaphore is taken.
2132 : : */
2133 : : get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
2134 : : EXT4_GET_BLOCKS_METADATA_NOFAIL;
2135 [ - + ]: 97725 : if (ext4_should_dioread_nolock(inode))
2136 : : get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
2137 [ + - ]: 97725 : if (map->m_flags & (1 << BH_Delay))
2138 : 97725 : get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2139 : :
2140 : 97725 : err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
2141 [ + + ]: 97723 : if (err < 0)
2142 : : return err;
2143 [ - + ]: 97703 : if (map->m_flags & EXT4_MAP_UNINIT) {
2144 [ # # ][ # # ]: 0 : if (!mpd->io_submit.io_end->handle &&
2145 : : ext4_handle_valid(handle)) {
2146 : 0 : mpd->io_submit.io_end->handle = handle->h_rsv_handle;
2147 : 0 : handle->h_rsv_handle = NULL;
2148 : : }
2149 : 0 : ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
2150 : : }
2151 : :
2152 [ - + ]: 97693 : BUG_ON(map->m_len == 0);
2153 [ + + ]: 97693 : if (map->m_flags & EXT4_MAP_NEW) {
2154 : 97690 : struct block_device *bdev = inode->i_sb->s_bdev;
2155 : : int i;
2156 : :
2157 [ + + ]: 747586 : for (i = 0; i < map->m_len; i++)
2158 : 649897 : unmap_underlying_metadata(bdev, map->m_pblk + i);
2159 : : }
2160 : : return 0;
2161 : : }
2162 : :
2163 : : /*
2164 : : * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length
2165 : : * mpd->len and submit pages underlying it for IO
2166 : : *
2167 : : * @handle - handle for journal operations
2168 : : * @mpd - extent to map
2169 : : * @give_up_on_write - we set this to true iff there is a fatal error and there
2170 : : * is no hope of writing the data. The caller should discard
2171 : : * dirty pages to avoid infinite loops.
2172 : : *
2173 : : * The function maps extent starting at mpd->lblk of length mpd->len. If it is
2174 : : * delayed, blocks are allocated, if it is unwritten, we may need to convert
2175 : : * them to initialized or split the described range from larger unwritten
2176 : : * extent. Note that we need not map all the described range since allocation
2177 : : * can return less blocks or the range is covered by more unwritten extents. We
2178 : : * cannot map more because we are limited by reserved transaction credits. On
2179 : : * the other hand we always make sure that the last touched page is fully
2180 : : * mapped so that it can be written out (and thus forward progress is
2181 : : * guaranteed). After mapping we submit all mapped pages for IO.
2182 : : */
2183 : 0 : static int mpage_map_and_submit_extent(handle_t *handle,
2184 : : struct mpage_da_data *mpd,
2185 : : bool *give_up_on_write)
2186 : : {
2187 : 97725 : struct inode *inode = mpd->inode;
2188 : : struct ext4_map_blocks *map = &mpd->map;
2189 : : int err;
2190 : : loff_t disksize;
2191 : :
2192 : 97725 : mpd->io_submit.io_end->offset =
2193 : 97725 : ((loff_t)map->m_lblk) << inode->i_blkbits;
2194 : : do {
2195 : 97731 : err = mpage_map_one_extent(handle, mpd);
2196 [ - + ]: 97705 : if (err < 0) {
2197 : 0 : struct super_block *sb = inode->i_sb;
2198 : :
2199 [ # # ]: 0 : if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
2200 : : goto invalidate_dirty_pages;
2201 : : /*
2202 : : * Let the uper layers retry transient errors.
2203 : : * In the case of ENOSPC, if ext4_count_free_blocks()
2204 : : * is non-zero, a commit should free up blocks.
2205 : : */
2206 [ # # ][ # # ]: 0 : if ((err == -ENOMEM) ||
2207 [ # # ]: 0 : (err == -ENOSPC && ext4_count_free_clusters(sb)))
2208 : 0 : return err;
2209 : 0 : ext4_msg(sb, KERN_CRIT,
2210 : : "Delayed block allocation failed for "
2211 : : "inode %lu at logical offset %llu with"
2212 : : " max blocks %u with error %d",
2213 : : inode->i_ino,
2214 : : (unsigned long long)map->m_lblk,
2215 : : (unsigned)map->m_len, -err);
2216 : 0 : ext4_msg(sb, KERN_CRIT,
2217 : : "This should not happen!! Data will "
2218 : : "be lost\n");
2219 [ # # ]: 0 : if (err == -ENOSPC)
2220 : 0 : ext4_print_free_blocks(inode);
2221 : : invalidate_dirty_pages:
2222 : 0 : *give_up_on_write = true;
2223 : 0 : return err;
2224 : : }
2225 : : /*
2226 : : * Update buffer state, submit mapped pages, and get us new
2227 : : * extent to map
2228 : : */
2229 : 97705 : err = mpage_map_and_submit_buffers(mpd);
2230 [ + ]: 97723 : if (err < 0)
2231 : : return err;
2232 [ + + ]: 97726 : } while (map->m_len);
2233 : :
2234 : : /* Update on-disk size after IO is submitted */
2235 : 97720 : disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2236 [ + + ]: 97720 : if (disksize > EXT4_I(inode)->i_disksize) {
2237 : : int err2;
2238 : :
2239 : : ext4_wb_update_i_disksize(inode, disksize);
2240 : 45127 : err2 = ext4_mark_inode_dirty(handle, inode);
2241 [ - + ]: 45127 : if (err2)
2242 : 0 : ext4_error(inode->i_sb,
2243 : : "Failed to mark inode %lu dirty",
2244 : : inode->i_ino);
2245 [ + ]: 45121 : if (!err)
2246 : : err = err2;
2247 : : }
2248 : 97715 : return err;
2249 : : }
2250 : :
2251 : : /*
2252 : : * Calculate the total number of credits to reserve for one writepages
2253 : : * iteration. This is called from ext4_writepages(). We map an extent of
2254 : : * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
2255 : : * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
2256 : : * bpp - 1 blocks in bpp different extents.
2257 : : */
2258 : 0 : static int ext4_da_writepages_trans_blocks(struct inode *inode)
2259 : : {
2260 : : int bpp = ext4_journal_blocks_per_page(inode);
2261 : :
2262 : 154674 : return ext4_meta_trans_blocks(inode,
2263 : : MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
2264 : : }
2265 : :
2266 : : /*
2267 : : * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages
2268 : : * and underlying extent to map
2269 : : *
2270 : : * @mpd - where to look for pages
2271 : : *
2272 : : * Walk dirty pages in the mapping. If they are fully mapped, submit them for
2273 : : * IO immediately. When we find a page which isn't mapped we start accumulating
2274 : : * extent of buffers underlying these pages that needs mapping (formed by
2275 : : * either delayed or unwritten buffers). We also lock the pages containing
2276 : : * these buffers. The extent found is returned in @mpd structure (starting at
2277 : : * mpd->lblk with length mpd->len blocks).
2278 : : *
2279 : : * Note that this function can attach bios to one io_end structure which are
2280 : : * neither logically nor physically contiguous. Although it may seem as an
2281 : : * unnecessary complication, it is actually inevitable in blocksize < pagesize
2282 : : * case as we need to track IO to all buffers underlying a page in one io_end.
2283 : : */
2284 : 0 : static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2285 : : {
2286 : 154707 : struct address_space *mapping = mpd->inode->i_mapping;
2287 : : struct pagevec pvec;
2288 : : unsigned int nr_pages;
2289 : 154707 : long left = mpd->wbc->nr_to_write;
2290 : 154707 : pgoff_t index = mpd->first_page;
2291 : 154707 : pgoff_t end = mpd->last_page;
2292 : : int tag;
2293 : : int i, err = 0;
2294 : 154707 : int blkbits = mpd->inode->i_blkbits;
2295 : : ext4_lblk_t lblk;
2296 : : struct buffer_head *head;
2297 : :
2298 [ + + ][ + + ]: 154707 : if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages)
2299 : : tag = PAGECACHE_TAG_TOWRITE;
2300 : : else
2301 : : tag = PAGECACHE_TAG_DIRTY;
2302 : :
2303 : : pagevec_init(&pvec, 0);
2304 : 154707 : mpd->map.m_len = 0;
2305 : 154707 : mpd->next_page = index;
2306 [ + + ]: 216931 : while (index <= end) {
2307 : 211587 : nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2308 : 211587 : min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2309 [ + + ]: 211625 : if (nr_pages == 0)
2310 : : goto out;
2311 : :
2312 [ + + ]: 1095588 : for (i = 0; i < nr_pages; i++) {
2313 : 1033333 : struct page *page = pvec.pages[i];
2314 : :
2315 : : /*
2316 : : * At this point, the page may be truncated or
2317 : : * invalidated (changing page->mapping to NULL), or
2318 : : * even swizzled back from swapper_space to tmpfs file
2319 : : * mapping. However, page->index will not change
2320 : : * because we have a reference on the page.
2321 : : */
2322 [ + + ]: 1033333 : if (page->index > end)
2323 : : goto out;
2324 : :
2325 : : /*
2326 : : * Accumulated enough dirty pages? This doesn't apply
2327 : : * to WB_SYNC_ALL mode. For integrity sync we have to
2328 : : * keep going because someone may be concurrently
2329 : : * dirtying pages, and we might have synced a lot of
2330 : : * newly appeared dirty pages, but have not synced all
2331 : : * of the old dirty pages.
2332 : : */
2333 [ + + ][ + + ]: 1033324 : if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0)
2334 : : goto out;
2335 : :
2336 : : /* If we can't merge this page, we are done. */
2337 [ + + ][ + + ]: 1032811 : if (mpd->map.m_len > 0 && mpd->next_page != page->index)
2338 : : goto out;
2339 : :
2340 : : lock_page(page);
2341 : : /*
2342 : : * If the page is no longer dirty, or its mapping no
2343 : : * longer corresponds to inode we are writing (which
2344 : : * means it has been truncated or invalidated), or the
2345 : : * page is already under writeback and we are not doing
2346 : : * a data integrity writeback, skip the page
2347 : : */
2348 [ + + ][ + + ]: 961969 : if (!PageDirty(page) ||
2349 [ + + ]: 5102 : (PageWriteback(page) &&
2350 [ - + ]: 932109 : (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
2351 : 932109 : unlikely(page->mapping != mapping)) {
2352 : 29860 : unlock_page(page);
2353 : 29847 : continue;
2354 : : }
2355 : :
2356 : : wait_on_page_writeback(page);
2357 [ - + ]: 932099 : BUG_ON(PageWriteback(page));
2358 : :
2359 [ + + ]: 932099 : if (mpd->map.m_len == 0)
2360 : 362500 : mpd->first_page = page->index;
2361 : 932099 : mpd->next_page = page->index + 1;
2362 : : /* Add all dirty buffers to mpd */
2363 : 1864198 : lblk = ((ext4_lblk_t)page->index) <<
2364 : 932099 : (PAGE_CACHE_SHIFT - blkbits);
2365 [ - + ]: 932099 : head = page_buffers(page);
2366 : 932099 : err = mpage_process_page_bufs(mpd, head, head, lblk);
2367 [ + + ]: 932079 : if (err <= 0)
2368 : : goto out;
2369 : : err = 0;
2370 : 888141 : left--;
2371 : : }
2372 : : pagevec_release(&pvec);
2373 : 62250 : cond_resched();
2374 : : }
2375 : : return 0;
2376 : : out:
2377 : : pagevec_release(&pvec);
2378 : 149371 : return err;
2379 : : }
2380 : :
2381 : 0 : static int __writepage(struct page *page, struct writeback_control *wbc,
2382 : : void *data)
2383 : : {
2384 : : struct address_space *mapping = data;
2385 : 5408 : int ret = ext4_writepage(page, wbc);
2386 : : mapping_set_error(mapping, ret);
2387 : 0 : return ret;
2388 : : }
2389 : :
2390 : 0 : static int ext4_writepages(struct address_space *mapping,
2391 : : struct writeback_control *wbc)
2392 : : {
2393 : : pgoff_t writeback_index = 0;
2394 : 115504 : long nr_to_write = wbc->nr_to_write;
2395 : : int range_whole = 0;
2396 : : int cycled = 1;
2397 : : handle_t *handle = NULL;
2398 : : struct mpage_da_data mpd;
2399 : 270183 : struct inode *inode = mapping->host;
2400 : : int needed_blocks, rsv_blocks = 0, ret = 0;
2401 : 115504 : struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2402 : : bool done;
2403 : : struct blk_plug plug;
2404 : 115504 : bool give_up_on_write = false;
2405 : :
2406 : : trace_ext4_writepages(inode, wbc);
2407 : :
2408 : : /*
2409 : : * No pages to write? This is mainly a kludge to avoid starting
2410 : : * a transaction for special inodes like journal inode on last iput()
2411 : : * because that could violate lock ordering on umount
2412 : : */
2413 [ + + ][ + + ]: 115473 : if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2414 : : goto out_writepages;
2415 : :
2416 [ + + ]: 62272 : if (ext4_should_journal_data(inode)) {
2417 : : struct blk_plug plug;
2418 : :
2419 : 5408 : blk_start_plug(&plug);
2420 : 5408 : ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2421 : 5408 : blk_finish_plug(&plug);
2422 : : goto out_writepages;
2423 : : }
2424 : :
2425 : : /*
2426 : : * If the filesystem has aborted, it is read-only, so return
2427 : : * right away instead of dumping stack traces later on that
2428 : : * will obscure the real source of the problem. We test
2429 : : * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because
2430 : : * the latter could be true if the filesystem is mounted
2431 : : * read-only, and in that case, ext4_writepages should
2432 : : * *never* be called, so if that ever happens, we would want
2433 : : * the stack trace.
2434 : : */
2435 [ + ]: 56864 : if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
2436 : : ret = -EROFS;
2437 : : goto out_writepages;
2438 : : }
2439 : :
2440 [ - + ]: 56886 : if (ext4_should_dioread_nolock(inode)) {
2441 : : /*
2442 : : * We may need to convert up to one extent per block in
2443 : : * the page and we may dirty the inode.
2444 : : */
2445 : 0 : rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
2446 : : }
2447 : :
2448 : : /*
2449 : : * If we have inline data and arrive here, it means that
2450 : : * we will soon create the block for the 1st page, so
2451 : : * we'd better clear the inline data here.
2452 : : */
2453 [ - + ]: 56886 : if (ext4_has_inline_data(inode)) {
2454 : : /* Just inode will be modified... */
2455 : : handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
2456 [ # # ]: 0 : if (IS_ERR(handle)) {
2457 : : ret = PTR_ERR(handle);
2458 : 0 : goto out_writepages;
2459 : : }
2460 [ # # ]: 0 : BUG_ON(ext4_test_inode_state(inode,
2461 : : EXT4_STATE_MAY_INLINE_DATA));
2462 : 0 : ext4_destroy_inline_data(handle, inode);
2463 : 0 : ext4_journal_stop(handle);
2464 : : }
2465 : :
2466 [ + + ][ + + ]: 172385 : if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2467 : : range_whole = 1;
2468 : :
2469 [ + + ]: 56881 : if (wbc->range_cyclic) {
2470 : 12239 : writeback_index = mapping->writeback_index;
2471 [ + + ]: 12239 : if (writeback_index)
2472 : : cycled = 0;
2473 : 12239 : mpd.first_page = writeback_index;
2474 : 12239 : mpd.last_page = -1;
2475 : : } else {
2476 : 44642 : mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT;
2477 : 44642 : mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
2478 : : }
2479 : :
2480 : 56881 : mpd.inode = inode;
2481 : 56881 : mpd.wbc = wbc;
2482 : 56881 : ext4_io_submit_init(&mpd.io_submit, wbc);
2483 : : retry:
2484 [ + + ][ + + ]: 57245 : if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2485 : 35118 : tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
2486 : : done = false;
2487 : 57254 : blk_start_plug(&plug);
2488 [ + + ][ + + ]: 211910 : while (!done && mpd.first_page <= mpd.last_page) {
2489 : : /* For each extent of pages we use new io_end */
2490 : 154697 : mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
2491 [ + + ]: 154711 : if (!mpd.io_submit.io_end) {
2492 : : ret = -ENOMEM;
2493 : : break;
2494 : : }
2495 : :
2496 : : /*
2497 : : * We have two constraints: We find one extent to map and we
2498 : : * must always write out whole page (makes a difference when
2499 : : * blocksize < pagesize) so that we don't block on IO when we
2500 : : * try to write out the rest of the page. Journalled mode is
2501 : : * not supported by delalloc.
2502 : : */
2503 [ - + ]: 154670 : BUG_ON(ext4_should_journal_data(inode));
2504 : 154670 : needed_blocks = ext4_da_writepages_trans_blocks(inode);
2505 : :
2506 : : /* start a new transaction */
2507 : : handle = ext4_journal_start_with_reserve(inode,
2508 : : EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
2509 [ - + ]: 154702 : if (IS_ERR(handle)) {
2510 : : ret = PTR_ERR(handle);
2511 : 0 : ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
2512 : : "%ld pages, ino %lu; err %d", __func__,
2513 : : wbc->nr_to_write, inode->i_ino, ret);
2514 : : /* Release allocated io_end */
2515 : 0 : ext4_put_io_end(mpd.io_submit.io_end);
2516 : 0 : break;
2517 : : }
2518 : :
2519 : 154702 : trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
2520 : 154702 : ret = mpage_prepare_extent_to_map(&mpd);
2521 [ + + ]: 154715 : if (!ret) {
2522 [ + + ]: 154714 : if (mpd.map.m_len)
2523 : 97725 : ret = mpage_map_and_submit_extent(handle, &mpd,
2524 : : &give_up_on_write);
2525 : : else {
2526 : : /*
2527 : : * We scanned the whole range (or exhausted
2528 : : * nr_to_write), submitted what was mapped and
2529 : : * didn't find anything needing mapping. We are
2530 : : * done.
2531 : : */
2532 : : done = true;
2533 : : }
2534 : : }
2535 : 154685 : ext4_journal_stop(handle);
2536 : : /* Submit prepared bio */
2537 : 154717 : ext4_io_submit(&mpd.io_submit);
2538 : : /* Unlock pages we didn't use */
2539 : 154663 : mpage_release_unused_pages(&mpd, give_up_on_write);
2540 : : /* Drop our io_end reference we got from init */
2541 : 154653 : ext4_put_io_end(mpd.io_submit.io_end);
2542 : :
2543 [ - + ][ # # ]: 154707 : if (ret == -ENOSPC && sbi->s_journal) {
2544 : : /*
2545 : : * Commit the transaction which would
2546 : : * free blocks released in the transaction
2547 : : * and try again
2548 : : */
2549 : 0 : jbd2_journal_force_commit_nested(sbi->s_journal);
2550 : : ret = 0;
2551 : 0 : continue;
2552 : : }
2553 : : /* Fatal error - ENOMEM, EIO... */
2554 [ + + ]: 154707 : if (ret)
2555 : : break;
2556 : : }
2557 : 57298 : blk_finish_plug(&plug);
2558 [ + + ][ + + ]: 57252 : if (!ret && !cycled && wbc->nr_to_write > 0) {
2559 : : cycled = 1;
2560 : 362 : mpd.last_page = writeback_index - 1;
2561 : 362 : mpd.first_page = 0;
2562 : 362 : goto retry;
2563 : : }
2564 : :
2565 : : /* Update index */
2566 [ + + ][ + + ]: 56890 : if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
[ + ]
2567 : : /*
2568 : : * Set the writeback_index so that range_cyclic
2569 : : * mode will write it back later
2570 : : */
2571 : 41157 : mapping->writeback_index = mpd.first_page;
2572 : :
2573 : : out_writepages:
2574 : : trace_ext4_writepages_result(inode, wbc, ret,
2575 : 115477 : nr_to_write - wbc->nr_to_write);
2576 : 115477 : return ret;
2577 : : }
2578 : :
2579 : 0 : static int ext4_nonda_switch(struct super_block *sb)
2580 : : {
2581 : : s64 free_clusters, dirty_clusters;
2582 : : struct ext4_sb_info *sbi = EXT4_SB(sb);
2583 : :
2584 : : /*
2585 : : * switch to non delalloc mode if we are running low
2586 : : * on free block. The free block accounting via percpu
2587 : : * counters can get slightly wrong with percpu_counter_batch getting
2588 : : * accumulated on each CPU without updating global counters
2589 : : * Delalloc need an accurate free block accounting. So switch
2590 : : * to non delalloc when we are near to error range.
2591 : : */
2592 : : free_clusters =
2593 : 7176459 : percpu_counter_read_positive(&sbi->s_freeclusters_counter);
2594 : : dirty_clusters =
2595 : 7176875 : percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2596 : : /*
2597 : : * Start pushing delalloc when 1/2 of free blocks are dirty.
2598 : : */
2599 [ + + ][ - + ]: 7176724 : if (dirty_clusters && (free_clusters < 2 * dirty_clusters))
2600 : 0 : try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2601 : :
2602 [ + - ][ + ]: 7176703 : if (2 * free_clusters < 3 * dirty_clusters ||
2603 : 7176703 : free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) {
2604 : : /*
2605 : : * free block count is less than 150% of dirty blocks
2606 : : * or free blocks is less than watermark
2607 : : */
2608 : : return 1;
2609 : : }
2610 : 7176877 : return 0;
2611 : : }
2612 : :
2613 : 0 : static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2614 : : loff_t pos, unsigned len, unsigned flags,
2615 : : struct page **pagep, void **fsdata)
2616 : : {
2617 : 6912479 : int ret, retries = 0;
2618 : : struct page *page;
2619 : : pgoff_t index;
2620 : 13824915 : struct inode *inode = mapping->host;
2621 : : handle_t *handle;
2622 : :
2623 : 6912479 : index = pos >> PAGE_CACHE_SHIFT;
2624 : :
2625 [ - + ]: 6912479 : if (ext4_nonda_switch(inode->i_sb)) {
2626 : 0 : *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
2627 : 0 : return ext4_write_begin(file, mapping, pos,
2628 : : len, flags, pagep, fsdata);
2629 : : }
2630 : 6912461 : *fsdata = (void *)0;
2631 : : trace_ext4_da_write_begin(inode, pos, len, flags);
2632 : :
2633 [ + - ]: 6912201 : if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2634 : 0 : ret = ext4_da_write_inline_data_begin(mapping, inode,
2635 : : pos, len, flags,
2636 : : pagep, fsdata);
2637 [ # # ]: 0 : if (ret < 0)
2638 : : return ret;
2639 [ # # ]: 0 : if (ret == 1)
2640 : : return 0;
2641 : : }
2642 : :
2643 : : /*
2644 : : * grab_cache_page_write_begin() can take a long time if the
2645 : : * system is thrashing due to memory pressure, or if the page
2646 : : * is being written back. So grab it first before we start
2647 : : * the transaction handle. This also allows us to allocate
2648 : : * the page (if needed) without using GFP_NOFS.
2649 : : */
2650 : : retry_grab:
2651 : 6912201 : page = grab_cache_page_write_begin(mapping, index, flags);
2652 [ + + ]: 6912497 : if (!page)
2653 : : return -ENOMEM;
2654 : 6912318 : unlock_page(page);
2655 : :
2656 : : /*
2657 : : * With delayed allocation, we don't log the i_disksize update
2658 : : * if there is delayed block allocation. But we still need
2659 : : * to journalling the i_disksize update if writes to the end
2660 : : * of file which has an already mapped buffer.
2661 : : */
2662 : : retry_journal:
2663 : : handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
2664 [ - + ]: 6912487 : if (IS_ERR(handle)) {
2665 : 0 : page_cache_release(page);
2666 : 0 : return PTR_ERR(handle);
2667 : : }
2668 : :
2669 : : lock_page(page);
2670 [ - + ]: 6912361 : if (page->mapping != mapping) {
2671 : : /* The page got truncated from under us */
2672 : 0 : unlock_page(page);
2673 : 0 : page_cache_release(page);
2674 : 0 : ext4_journal_stop(handle);
2675 : 0 : goto retry_grab;
2676 : : }
2677 : : /* In case writeback began while the page was unlocked */
2678 : 6912361 : wait_for_stable_page(page);
2679 : :
2680 : 6912485 : ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2681 [ - + ]: 13824940 : if (ret < 0) {
2682 : 0 : unlock_page(page);
2683 : 0 : ext4_journal_stop(handle);
2684 : : /*
2685 : : * block_write_begin may have instantiated a few blocks
2686 : : * outside i_size. Trim these off again. Don't need
2687 : : * i_size_read because we hold i_mutex.
2688 : : */
2689 [ # # ]: 0 : if (pos + len > inode->i_size)
2690 : : ext4_truncate_failed_write(inode);
2691 : :
2692 [ # # # # ]: 0 : if (ret == -ENOSPC &&
2693 : 0 : ext4_should_retry_alloc(inode->i_sb, &retries))
2694 : : goto retry_journal;
2695 : :
2696 : 0 : page_cache_release(page);
2697 : 0 : return ret;
2698 : : }
2699 : :
2700 : 6912461 : *pagep = page;
2701 : 6912461 : return ret;
2702 : : }
2703 : :
2704 : : /*
2705 : : * Check if we should update i_disksize
2706 : : * when write to the end of file but not require block allocation
2707 : : */
2708 : 0 : static int ext4_da_should_update_i_disksize(struct page *page,
2709 : : unsigned long offset)
2710 : : {
2711 : : struct buffer_head *bh;
2712 : 5155480 : struct inode *inode = page->mapping->host;
2713 : : unsigned int idx;
2714 : : int i;
2715 : :
2716 [ - + ]: 5155480 : bh = page_buffers(page);
2717 : 5155480 : idx = offset >> inode->i_blkbits;
2718 : :
2719 [ - + ]: 5155480 : for (i = 0; i < idx; i++)
2720 : 0 : bh = bh->b_this_page;
2721 : :
2722 [ + + ][ + + ]: 5155480 : if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
[ + + ]
2723 : : return 0;
2724 : : return 1;
2725 : : }
2726 : :
2727 : 0 : static int ext4_da_write_end(struct file *file,
2728 : : struct address_space *mapping,
2729 : : loff_t pos, unsigned len, unsigned copied,
2730 : : struct page *page, void *fsdata)
2731 : : {
2732 : 6912464 : struct inode *inode = mapping->host;
2733 : : int ret = 0, ret2;
2734 : : handle_t *handle = ext4_journal_current_handle();
2735 : : loff_t new_i_size;
2736 : : unsigned long start, end;
2737 : 6912464 : int write_mode = (int)(unsigned long)fsdata;
2738 : :
2739 [ - + ]: 6912464 : if (write_mode == FALL_BACK_TO_NONDELALLOC)
2740 : 0 : return ext4_write_end(file, mapping, pos,
2741 : : len, copied, page, fsdata);
2742 : :
2743 : : trace_ext4_da_write_end(inode, pos, len, copied);
2744 : 6912487 : start = pos & (PAGE_CACHE_SIZE - 1);
2745 : 6912487 : end = start + copied - 1;
2746 : :
2747 : : /*
2748 : : * generic_write_end() will run mark_inode_dirty() if i_size
2749 : : * changes. So let's piggyback the i_disksize mark_inode_dirty
2750 : : * into that.
2751 : : */
2752 : 6912487 : new_i_size = pos + copied;
2753 [ + + ][ + + ]: 6912487 : if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
2754 [ + + + ]: 10310952 : if (ext4_has_inline_data(inode) ||
2755 : 5155475 : ext4_da_should_update_i_disksize(page, end)) {
2756 : 2147 : down_write(&EXT4_I(inode)->i_data_sem);
2757 [ + - ]: 2155 : if (new_i_size > EXT4_I(inode)->i_disksize)
2758 : 2155 : EXT4_I(inode)->i_disksize = new_i_size;
2759 : 2155 : up_write(&EXT4_I(inode)->i_data_sem);
2760 : : /* We need to mark inode dirty even if
2761 : : * new_i_size is less that inode->i_size
2762 : : * bu greater than i_disksize.(hint delalloc)
2763 : : */
2764 : 2155 : ext4_mark_inode_dirty(handle, inode);
2765 : : }
2766 : : }
2767 : :
2768 [ + - ][ - + ]: 6912488 : if (write_mode != CONVERT_INLINE_DATA &&
2769 [ # # ]: 0 : ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
2770 : 0 : ext4_has_inline_data(inode))
2771 : 0 : ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied,
2772 : : page);
2773 : : else
2774 : 6912488 : ret2 = generic_write_end(file, mapping, pos, len, copied,
2775 : : page, fsdata);
2776 : :
2777 : : copied = ret2;
2778 [ - + ]: 6912473 : if (ret2 < 0)
2779 : : ret = ret2;
2780 : 6912473 : ret2 = ext4_journal_stop(handle);
2781 [ + + ]: 13824994 : if (!ret)
2782 : : ret = ret2;
2783 : :
2784 [ - + ]: 6912530 : return ret ? ret : copied;
2785 : : }
2786 : :
2787 : 0 : static void ext4_da_invalidatepage(struct page *page, unsigned int offset,
2788 : : unsigned int length)
2789 : : {
2790 : : /*
2791 : : * Drop reserved blocks
2792 : : */
2793 [ - + ]: 1713937 : BUG_ON(!PageLocked(page));
2794 [ + + ]: 1713937 : if (!page_has_buffers(page))
2795 : : goto out;
2796 : :
2797 : 1713933 : ext4_da_page_release_reservation(page, offset, length);
2798 : :
2799 : : out:
2800 : 1713930 : ext4_invalidatepage(page, offset, length);
2801 : :
2802 : 1713944 : return;
2803 : : }
2804 : :
2805 : : /*
2806 : : * Force all delayed allocation blocks to be allocated for a given inode.
2807 : : */
2808 : 0 : int ext4_alloc_da_blocks(struct inode *inode)
2809 : : {
2810 : : trace_ext4_alloc_da_blocks(inode);
2811 : :
2812 [ + + ][ - + ]: 272788 : if (!EXT4_I(inode)->i_reserved_data_blocks &&
2813 : 267478 : !EXT4_I(inode)->i_reserved_meta_blocks)
2814 : : return 0;
2815 : :
2816 : : /*
2817 : : * We do something simple for now. The filemap_flush() will
2818 : : * also start triggering a write of the data blocks, which is
2819 : : * not strictly speaking necessary (and for users of
2820 : : * laptop_mode, not even desirable). However, to do otherwise
2821 : : * would require replicating code paths in:
2822 : : *
2823 : : * ext4_writepages() ->
2824 : : * write_cache_pages() ---> (via passed in callback function)
2825 : : * __mpage_da_writepage() -->
2826 : : * mpage_add_bh_to_extent()
2827 : : * mpage_da_map_blocks()
2828 : : *
2829 : : * The problem is that write_cache_pages(), located in
2830 : : * mm/page-writeback.c, marks pages clean in preparation for
2831 : : * doing I/O, which is not desirable if we're not planning on
2832 : : * doing I/O at all.
2833 : : *
2834 : : * We could call write_cache_pages(), and then redirty all of
2835 : : * the pages by calling redirty_page_for_writepage() but that
2836 : : * would be ugly in the extreme. So instead we would need to
2837 : : * replicate parts of the code in the above functions,
2838 : : * simplifying them because we wouldn't actually intend to
2839 : : * write out the pages, but rather only collect contiguous
2840 : : * logical block extents, call the multi-block allocator, and
2841 : : * then update the buffer heads with the block allocations.
2842 : : *
2843 : : * For now, though, we'll cheat by calling filemap_flush(),
2844 : : * which will map the blocks, and start the I/O, but not
2845 : : * actually wait for the I/O to complete.
2846 : : */
2847 : 5310 : return filemap_flush(inode->i_mapping);
2848 : : }
2849 : :
2850 : : /*
2851 : : * bmap() is special. It gets used by applications such as lilo and by
2852 : : * the swapper to find the on-disk block of a specific piece of data.
2853 : : *
2854 : : * Naturally, this is dangerous if the block concerned is still in the
2855 : : * journal. If somebody makes a swapfile on an ext4 data-journaling
2856 : : * filesystem and enables swap, then they may get a nasty shock when the
2857 : : * data getting swapped to that swapfile suddenly gets overwritten by
2858 : : * the original zero's written out previously to the journal and
2859 : : * awaiting writeback in the kernel's buffer cache.
2860 : : *
2861 : : * So, if we see any bmap calls here on a modified, data-journaled file,
2862 : : * take extra steps to flush any blocks which might be in the cache.
2863 : : */
2864 : 0 : static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2865 : : {
2866 : 145402 : struct inode *inode = mapping->host;
2867 : : journal_t *journal;
2868 : : int err;
2869 : :
2870 : : /*
2871 : : * We can get here for an inline file via the FIBMAP ioctl
2872 : : */
2873 [ + - ]: 145402 : if (ext4_has_inline_data(inode))
2874 : : return 0;
2875 : :
2876 [ - + ][ # # ]: 145402 : if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
2877 : 145402 : test_opt(inode->i_sb, DELALLOC)) {
2878 : : /*
2879 : : * With delalloc we want to sync the file
2880 : : * so that we can make sure we allocate
2881 : : * blocks for file
2882 : : */
2883 : 0 : filemap_write_and_wait(mapping);
2884 : : }
2885 : :
2886 [ + - ][ - + ]: 145402 : if (EXT4_JOURNAL(inode) &&
2887 : : ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
2888 : : /*
2889 : : * This is a REALLY heavyweight approach, but the use of
2890 : : * bmap on dirty files is expected to be extremely rare:
2891 : : * only if we run lilo or swapon on a freshly made file
2892 : : * do we expect this to happen.
2893 : : *
2894 : : * (bmap requires CAP_SYS_RAWIO so this does not
2895 : : * represent an unprivileged user DOS attack --- we'd be
2896 : : * in trouble if mortal users could trigger this path at
2897 : : * will.)
2898 : : *
2899 : : * NB. EXT4_STATE_JDATA is not set on files other than
2900 : : * regular files. If somebody wants to bmap a directory
2901 : : * or symlink and gets confused because the buffer
2902 : : * hasn't yet been flushed to disk, they deserve
2903 : : * everything they get.
2904 : : */
2905 : :
2906 : : ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
2907 : 0 : journal = EXT4_JOURNAL(inode);
2908 : 0 : jbd2_journal_lock_updates(journal);
2909 : 0 : err = jbd2_journal_flush(journal);
2910 : 0 : jbd2_journal_unlock_updates(journal);
2911 : :
2912 [ # # ]: 0 : if (err)
2913 : : return 0;
2914 : : }
2915 : :
2916 : 145402 : return generic_block_bmap(mapping, block, ext4_get_block);
2917 : : }
2918 : :
2919 : 0 : static int ext4_readpage(struct file *file, struct page *page)
2920 : : {
2921 : : int ret = -EAGAIN;
2922 : 29337 : struct inode *inode = page->mapping->host;
2923 : :
2924 : : trace_ext4_readpage(page);
2925 : :
2926 [ - + ]: 29337 : if (ext4_has_inline_data(inode))
2927 : 0 : ret = ext4_readpage_inline(inode, page);
2928 : :
2929 [ + - ]: 29337 : if (ret == -EAGAIN)
2930 : 29337 : return mpage_readpage(page, ext4_get_block);
2931 : :
2932 : : return ret;
2933 : : }
2934 : :
2935 : : static int
2936 : 0 : ext4_readpages(struct file *file, struct address_space *mapping,
2937 : : struct list_head *pages, unsigned nr_pages)
2938 : : {
2939 : 120038 : struct inode *inode = mapping->host;
2940 : :
2941 : : /* If the file has inline data, no need to do readpages. */
2942 [ + + ]: 120038 : if (ext4_has_inline_data(inode))
2943 : : return 0;
2944 : :
2945 : 119954 : return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
2946 : : }
2947 : :
2948 : 0 : static void ext4_invalidatepage(struct page *page, unsigned int offset,
2949 : : unsigned int length)
2950 : : {
2951 : : trace_ext4_invalidatepage(page, offset, length);
2952 : :
2953 : : /* No journalling happens on data buffers when this function is used */
2954 [ + ][ - + ]: 3427877 : WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
[ + + ][ - + ]
2955 : :
2956 : 1713941 : block_invalidatepage(page, offset, length);
2957 : 1713950 : }
2958 : :
2959 : 0 : static int __ext4_journalled_invalidatepage(struct page *page,
2960 : : unsigned int offset,
2961 : : unsigned int length)
2962 : : {
2963 : 7223 : journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2964 : :
2965 : : trace_ext4_journalled_invalidatepage(page, offset, length);
2966 : :
2967 : : /*
2968 : : * If it's a full truncate we just forget about the pending dirtying
2969 : : */
2970 [ + - ]: 7223 : if (offset == 0 && length == PAGE_CACHE_SIZE)
2971 : : ClearPageChecked(page);
2972 : :
2973 : 7223 : return jbd2_journal_invalidatepage(journal, page, offset, length);
2974 : : }
2975 : :
2976 : : /* Wrapper for aops... */
2977 : 0 : static void ext4_journalled_invalidatepage(struct page *page,
2978 : : unsigned int offset,
2979 : : unsigned int length)
2980 : : {
2981 [ - + ]: 7223 : WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0);
2982 : 0 : }
2983 : :
2984 : 0 : static int ext4_releasepage(struct page *page, gfp_t wait)
2985 : : {
2986 : 1846970 : journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2987 : :
2988 : : trace_ext4_releasepage(page);
2989 : :
2990 : : /* Page has dirty journalled data -> cannot release */
2991 [ + - ]: 3693944 : if (PageChecked(page))
2992 : : return 0;
2993 [ + - ]: 1846974 : if (journal)
2994 : 1846974 : return jbd2_journal_try_to_free_buffers(journal, page, wait);
2995 : : else
2996 : 0 : return try_to_free_buffers(page);
2997 : : }
2998 : :
2999 : : /*
3000 : : * ext4_get_block used when preparing for a DIO write or buffer write.
3001 : : * We allocate an uinitialized extent if blocks haven't been allocated.
3002 : : * The extent will be converted to initialized after the IO is complete.
3003 : : */
3004 : 0 : int ext4_get_block_write(struct inode *inode, sector_t iblock,
3005 : : struct buffer_head *bh_result, int create)
3006 : : {
3007 : : ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
3008 : : inode->i_ino, create);
3009 : 144772 : return _ext4_get_block(inode, iblock, bh_result,
3010 : : EXT4_GET_BLOCKS_IO_CREATE_EXT);
3011 : : }
3012 : :
3013 : 0 : static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
3014 : : struct buffer_head *bh_result, int create)
3015 : : {
3016 : : ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
3017 : : inode->i_ino, create);
3018 : 0 : return _ext4_get_block(inode, iblock, bh_result,
3019 : : EXT4_GET_BLOCKS_NO_LOCK);
3020 : : }
3021 : :
3022 : 0 : static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3023 : : ssize_t size, void *private)
3024 : : {
3025 : 29442 : ext4_io_end_t *io_end = iocb->private;
3026 : :
3027 : : /* if not async direct IO just return */
3028 [ - + ]: 29442 : if (!io_end)
3029 : 0 : return;
3030 : :
3031 : : ext_debug("ext4_end_io_dio(): io_end 0x%p "
3032 : : "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
3033 : : iocb->private, io_end->inode->i_ino, iocb, offset,
3034 : : size);
3035 : :
3036 : 0 : iocb->private = NULL;
3037 : 0 : io_end->offset = offset;
3038 : 0 : io_end->size = size;
3039 : 0 : ext4_put_io_end(io_end);
3040 : : }
3041 : :
3042 : : /*
3043 : : * For ext4 extent files, ext4 will do direct-io write to holes,
3044 : : * preallocated extents, and those write extend the file, no need to
3045 : : * fall back to buffered IO.
3046 : : *
3047 : : * For holes, we fallocate those blocks, mark them as uninitialized
3048 : : * If those blocks were preallocated, we mark sure they are split, but
3049 : : * still keep the range to write as uninitialized.
3050 : : *
3051 : : * The unwritten extents will be converted to written when DIO is completed.
3052 : : * For async direct IO, since the IO may still pending when return, we
3053 : : * set up an end_io call back function, which will do the conversion
3054 : : * when async direct IO completed.
3055 : : *
3056 : : * If the O_DIRECT write will extend the file then add this inode to the
3057 : : * orphan list. So recovery will truncate it back to the original size
3058 : : * if the machine crashes during the write.
3059 : : *
3060 : : */
3061 : 0 : static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3062 : : const struct iovec *iov, loff_t offset,
3063 : : unsigned long nr_segs)
3064 : : {
3065 : 130644 : struct file *file = iocb->ki_filp;
3066 : 130644 : struct inode *inode = file->f_mapping->host;
3067 : : ssize_t ret;
3068 : : size_t count = iov_length(iov, nr_segs);
3069 : : int overwrite = 0;
3070 : : get_block_t *get_block_func = NULL;
3071 : : int dio_flags = 0;
3072 : 130644 : loff_t final_size = offset + count;
3073 : : ext4_io_end_t *io_end = NULL;
3074 : :
3075 : : /* Use the old path for reads and writes beyond i_size. */
3076 [ + + ][ + + ]: 130644 : if (rw != WRITE || final_size > inode->i_size)
3077 : 101196 : return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3078 : :
3079 [ - + ]: 29448 : BUG_ON(iocb->private == NULL);
3080 : :
3081 : : /*
3082 : : * Make all waiters for direct IO properly wait also for extent
3083 : : * conversion. This also disallows race between truncate() and
3084 : : * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
3085 : : */
3086 [ + - ]: 29448 : if (rw == WRITE)
3087 : 29448 : atomic_inc(&inode->i_dio_count);
3088 : :
3089 : : /* If we do a overwrite dio, i_mutex locking can be released */
3090 : 29448 : overwrite = *((int *)iocb->private);
3091 : :
3092 [ - + ]: 160092 : if (overwrite) {
3093 : 0 : down_read(&EXT4_I(inode)->i_data_sem);
3094 : 0 : mutex_unlock(&inode->i_mutex);
3095 : : }
3096 : :
3097 : : /*
3098 : : * We could direct write to holes and fallocate.
3099 : : *
3100 : : * Allocated blocks to fill the hole are marked as
3101 : : * uninitialized to prevent parallel buffered read to expose
3102 : : * the stale data before DIO complete the data IO.
3103 : : *
3104 : : * As to previously fallocated extents, ext4 get_block will
3105 : : * just simply mark the buffer mapped but still keep the
3106 : : * extents uninitialized.
3107 : : *
3108 : : * For non AIO case, we will convert those unwritten extents
3109 : : * to written after return back from blockdev_direct_IO.
3110 : : *
3111 : : * For async DIO, the conversion needs to be deferred when the
3112 : : * IO is completed. The ext4 end_io callback function will be
3113 : : * called to take care of the conversion work. Here for async
3114 : : * case, we allocate an io_end structure to hook to the iocb.
3115 : : */
3116 : 29448 : iocb->private = NULL;
3117 : : ext4_inode_aio_set(inode, NULL);
3118 [ - + ]: 29448 : if (!is_sync_kiocb(iocb)) {
3119 : 0 : io_end = ext4_init_io_end(inode, GFP_NOFS);
3120 [ # # ]: 0 : if (!io_end) {
3121 : : ret = -ENOMEM;
3122 : : goto retake_lock;
3123 : : }
3124 : : /*
3125 : : * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3126 : : */
3127 : 0 : iocb->private = ext4_get_io_end(io_end);
3128 : : /*
3129 : : * we save the io structure for current async direct
3130 : : * IO, so that later ext4_map_blocks() could flag the
3131 : : * io structure whether there is a unwritten extents
3132 : : * needs to be converted when IO is completed.
3133 : : */
3134 : : ext4_inode_aio_set(inode, io_end);
3135 : : }
3136 : :
3137 [ + - ]: 29448 : if (overwrite) {
3138 : : get_block_func = ext4_get_block_write_nolock;
3139 : : } else {
3140 : : get_block_func = ext4_get_block_write;
3141 : : dio_flags = DIO_LOCKING;
3142 : : }
3143 : 29448 : ret = __blockdev_direct_IO(rw, iocb, inode,
3144 : 29448 : inode->i_sb->s_bdev, iov,
3145 : : offset, nr_segs,
3146 : : get_block_func,
3147 : : ext4_end_io_dio,
3148 : : NULL,
3149 : : dio_flags);
3150 : :
3151 : : /*
3152 : : * Put our reference to io_end. This can free the io_end structure e.g.
3153 : : * in sync IO case or in case of error. It can even perform extent
3154 : : * conversion if all bios we submitted finished before we got here.
3155 : : * Note that in that case iocb->private can be already set to NULL
3156 : : * here.
3157 : : */
3158 [ - + ]: 29448 : if (io_end) {
3159 : : ext4_inode_aio_set(inode, NULL);
3160 : 0 : ext4_put_io_end(io_end);
3161 : : /*
3162 : : * When no IO was submitted ext4_end_io_dio() was not
3163 : : * called so we have to put iocb's reference.
3164 : : */
3165 [ # # ][ # # ]: 0 : if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
3166 [ # # ]: 0 : WARN_ON(iocb->private != io_end);
3167 [ # # ]: 0 : WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
3168 : 0 : ext4_put_io_end(io_end);
3169 : 0 : iocb->private = NULL;
3170 : : }
3171 : : }
3172 [ + + ][ - + ]: 29448 : if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3173 : : EXT4_STATE_DIO_UNWRITTEN)) {
3174 : : int err;
3175 : : /*
3176 : : * for non AIO case, since the IO is already
3177 : : * completed, we could do the conversion right here
3178 : : */
3179 : 0 : err = ext4_convert_unwritten_extents(NULL, inode,
3180 : : offset, ret);
3181 [ # # ]: 0 : if (err < 0)
3182 : : ret = err;
3183 : : ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3184 : : }
3185 : :
3186 : : retake_lock:
3187 [ + - ]: 29448 : if (rw == WRITE)
3188 : 29448 : inode_dio_done(inode);
3189 : : /* take i_mutex locking again if we do a ovewrite dio */
3190 [ - + ]: 29448 : if (overwrite) {
3191 : 0 : up_read(&EXT4_I(inode)->i_data_sem);
3192 : 0 : mutex_lock(&inode->i_mutex);
3193 : : }
3194 : :
3195 : 29448 : return ret;
3196 : : }
3197 : :
3198 : 0 : static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3199 : : const struct iovec *iov, loff_t offset,
3200 : : unsigned long nr_segs)
3201 : : {
3202 : 130644 : struct file *file = iocb->ki_filp;
3203 : 130644 : struct inode *inode = file->f_mapping->host;
3204 : : ssize_t ret;
3205 : :
3206 : : /*
3207 : : * If we are doing data journalling we don't support O_DIRECT
3208 : : */
3209 [ + - ]: 130644 : if (ext4_should_journal_data(inode))
3210 : : return 0;
3211 : :
3212 : : /* Let buffer I/O handle the inline data case. */
3213 [ + - ]: 130644 : if (ext4_has_inline_data(inode))
3214 : : return 0;
3215 : :
3216 : : trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
3217 [ + - ]: 130644 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3218 : 130644 : ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
3219 : : else
3220 : 130644 : ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3221 : : trace_ext4_direct_IO_exit(inode, offset,
3222 : : iov_length(iov, nr_segs), rw, ret);
3223 : 130644 : return ret;
3224 : : }
3225 : :
3226 : : /*
3227 : : * Pages can be marked dirty completely asynchronously from ext4's journalling
3228 : : * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do
3229 : : * much here because ->set_page_dirty is called under VFS locks. The page is
3230 : : * not necessarily locked.
3231 : : *
3232 : : * We cannot just dirty the page and leave attached buffers clean, because the
3233 : : * buffers' dirty state is "definitive". We cannot just set the buffers dirty
3234 : : * or jbddirty because all the journalling code will explode.
3235 : : *
3236 : : * So what we do is to mark the page "pending dirty" and next time writepage
3237 : : * is called, propagate that into the buffers appropriately.
3238 : : */
3239 : 0 : static int ext4_journalled_set_page_dirty(struct page *page)
3240 : : {
3241 : : SetPageChecked(page);
3242 : 0 : return __set_page_dirty_nobuffers(page);
3243 : : }
3244 : :
3245 : : static const struct address_space_operations ext4_aops = {
3246 : : .readpage = ext4_readpage,
3247 : : .readpages = ext4_readpages,
3248 : : .writepage = ext4_writepage,
3249 : : .writepages = ext4_writepages,
3250 : : .write_begin = ext4_write_begin,
3251 : : .write_end = ext4_write_end,
3252 : : .bmap = ext4_bmap,
3253 : : .invalidatepage = ext4_invalidatepage,
3254 : : .releasepage = ext4_releasepage,
3255 : : .direct_IO = ext4_direct_IO,
3256 : : .migratepage = buffer_migrate_page,
3257 : : .is_partially_uptodate = block_is_partially_uptodate,
3258 : : .error_remove_page = generic_error_remove_page,
3259 : : };
3260 : :
3261 : : static const struct address_space_operations ext4_journalled_aops = {
3262 : : .readpage = ext4_readpage,
3263 : : .readpages = ext4_readpages,
3264 : : .writepage = ext4_writepage,
3265 : : .writepages = ext4_writepages,
3266 : : .write_begin = ext4_write_begin,
3267 : : .write_end = ext4_journalled_write_end,
3268 : : .set_page_dirty = ext4_journalled_set_page_dirty,
3269 : : .bmap = ext4_bmap,
3270 : : .invalidatepage = ext4_journalled_invalidatepage,
3271 : : .releasepage = ext4_releasepage,
3272 : : .direct_IO = ext4_direct_IO,
3273 : : .is_partially_uptodate = block_is_partially_uptodate,
3274 : : .error_remove_page = generic_error_remove_page,
3275 : : };
3276 : :
3277 : : static const struct address_space_operations ext4_da_aops = {
3278 : : .readpage = ext4_readpage,
3279 : : .readpages = ext4_readpages,
3280 : : .writepage = ext4_writepage,
3281 : : .writepages = ext4_writepages,
3282 : : .write_begin = ext4_da_write_begin,
3283 : : .write_end = ext4_da_write_end,
3284 : : .bmap = ext4_bmap,
3285 : : .invalidatepage = ext4_da_invalidatepage,
3286 : : .releasepage = ext4_releasepage,
3287 : : .direct_IO = ext4_direct_IO,
3288 : : .migratepage = buffer_migrate_page,
3289 : : .is_partially_uptodate = block_is_partially_uptodate,
3290 : : .error_remove_page = generic_error_remove_page,
3291 : : };
3292 : :
3293 : 0 : void ext4_set_aops(struct inode *inode)
3294 : : {
3295 [ + - + - ]: 341740 : switch (ext4_inode_journal_mode(inode)) {
3296 : : case EXT4_INODE_ORDERED_DATA_MODE:
3297 : : ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE);
3298 : : break;
3299 : : case EXT4_INODE_WRITEBACK_DATA_MODE:
3300 : : ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE);
3301 : : break;
3302 : : case EXT4_INODE_JOURNAL_DATA_MODE:
3303 : 7555 : inode->i_mapping->a_ops = &ext4_journalled_aops;
3304 : 7555 : return;
3305 : : default:
3306 : 0 : BUG();
3307 : : }
3308 [ + - ]: 675925 : if (test_opt(inode->i_sb, DELALLOC))
3309 : 334185 : inode->i_mapping->a_ops = &ext4_da_aops;
3310 : : else
3311 : 0 : inode->i_mapping->a_ops = &ext4_aops;
3312 : : }
3313 : :
3314 : : /*
3315 : : * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3316 : : * up to the end of the block which corresponds to `from'.
3317 : : * This required during truncate. We need to physically zero the tail end
3318 : : * of that block so it doesn't yield old data if the file is later grown.
3319 : : */
3320 : 0 : int ext4_block_truncate_page(handle_t *handle,
3321 : : struct address_space *mapping, loff_t from)
3322 : : {
3323 : 20426 : unsigned offset = from & (PAGE_CACHE_SIZE-1);
3324 : : unsigned length;
3325 : : unsigned blocksize;
3326 : 20426 : struct inode *inode = mapping->host;
3327 : :
3328 : 20426 : blocksize = inode->i_sb->s_blocksize;
3329 : 20426 : length = blocksize - (offset & (blocksize - 1));
3330 : :
3331 : 20426 : return ext4_block_zero_page_range(handle, mapping, from, length);
3332 : : }
3333 : :
3334 : : /*
3335 : : * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3336 : : * starting from file offset 'from'. The range to be zero'd must
3337 : : * be contained with in one block. If the specified range exceeds
3338 : : * the end of the block it will be shortened to end of the block
3339 : : * that cooresponds to 'from'
3340 : : */
3341 : 0 : int ext4_block_zero_page_range(handle_t *handle,
3342 : 20426 : struct address_space *mapping, loff_t from, loff_t length)
3343 : : {
3344 : 20426 : ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3345 : 20426 : unsigned offset = from & (PAGE_CACHE_SIZE-1);
3346 : : unsigned blocksize, max, pos;
3347 : : ext4_lblk_t iblock;
3348 : 20426 : struct inode *inode = mapping->host;
3349 : : struct buffer_head *bh;
3350 : : struct page *page;
3351 : : int err = 0;
3352 : :
3353 : 20426 : page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3354 : : mapping_gfp_mask(mapping) & ~__GFP_FS);
3355 [ + - ]: 20426 : if (!page)
3356 : : return -ENOMEM;
3357 : :
3358 : 20426 : blocksize = inode->i_sb->s_blocksize;
3359 : 20426 : max = blocksize - (offset & (blocksize - 1));
3360 : :
3361 : : /*
3362 : : * correct length if it does not fall between
3363 : : * 'from' and the end of the block
3364 : : */
3365 [ + - ][ - + ]: 20426 : if (length > max || length < 0)
3366 : : length = max;
3367 : :
3368 : 20426 : iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3369 : :
3370 [ + + ]: 20426 : if (!page_has_buffers(page))
3371 : 5408 : create_empty_buffers(page, blocksize, 0);
3372 : :
3373 : : /* Find the buffer that contains "offset" */
3374 [ - + ]: 20426 : bh = page_buffers(page);
3375 : : pos = blocksize;
3376 [ - + ]: 20426 : while (offset >= pos) {
3377 : 0 : bh = bh->b_this_page;
3378 : 0 : iblock++;
3379 : 0 : pos += blocksize;
3380 : : }
3381 [ + - ]: 20426 : if (buffer_freed(bh)) {
3382 : : BUFFER_TRACE(bh, "freed: skip");
3383 : : goto unlock;
3384 : : }
3385 [ + + ]: 20426 : if (!buffer_mapped(bh)) {
3386 : : BUFFER_TRACE(bh, "unmapped");
3387 : 5485 : ext4_get_block(inode, iblock, bh, 0);
3388 : : /* unmapped? It's a hole - nothing to do */
3389 [ + ]: 5485 : if (!buffer_mapped(bh)) {
3390 : : BUFFER_TRACE(bh, "still unmapped");
3391 : : goto unlock;
3392 : : }
3393 : : }
3394 : :
3395 : : /* Ok, it's mapped. Make sure it's up-to-date */
3396 [ + - ]: 14941 : if (PageUptodate(page))
3397 : 14941 : set_buffer_uptodate(bh);
3398 : :
3399 [ - + ]: 14941 : if (!buffer_uptodate(bh)) {
3400 : : err = -EIO;
3401 : 0 : ll_rw_block(READ, 1, &bh);
3402 : 0 : wait_on_buffer(bh);
3403 : : /* Uhhuh. Read error. Complain and punt. */
3404 [ # # ]: 0 : if (!buffer_uptodate(bh))
3405 : : goto unlock;
3406 : : }
3407 [ - + ]: 14941 : if (ext4_should_journal_data(inode)) {
3408 : : BUFFER_TRACE(bh, "get write access");
3409 : 0 : err = ext4_journal_get_write_access(handle, bh);
3410 [ # # ]: 0 : if (err)
3411 : : goto unlock;
3412 : : }
3413 : 14941 : zero_user(page, offset, length);
3414 : : BUFFER_TRACE(bh, "zeroed end of block");
3415 : :
3416 [ - + ]: 14941 : if (ext4_should_journal_data(inode)) {
3417 : 0 : err = ext4_handle_dirty_metadata(handle, inode, bh);
3418 : : } else {
3419 : : err = 0;
3420 : 14941 : mark_buffer_dirty(bh);
3421 [ + - ]: 14941 : if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE))
3422 : : err = ext4_jbd2_file_inode(handle, inode);
3423 : : }
3424 : :
3425 : : unlock:
3426 : 0 : unlock_page(page);
3427 : 20426 : page_cache_release(page);
3428 : 20426 : return err;
3429 : : }
3430 : :
3431 : 0 : int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
3432 : : loff_t lstart, loff_t length)
3433 : : {
3434 : 2 : struct super_block *sb = inode->i_sb;
3435 : 2 : struct address_space *mapping = inode->i_mapping;
3436 : : unsigned partial_start, partial_end;
3437 : : ext4_fsblk_t start, end;
3438 : 2 : loff_t byte_end = (lstart + length - 1);
3439 : : int err = 0;
3440 : :
3441 : 2 : partial_start = lstart & (sb->s_blocksize - 1);
3442 : 2 : partial_end = byte_end & (sb->s_blocksize - 1);
3443 : :
3444 : 2 : start = lstart >> sb->s_blocksize_bits;
3445 : 2 : end = byte_end >> sb->s_blocksize_bits;
3446 : :
3447 : : /* Handle partial zero within the single block */
3448 [ - + ][ # # ]: 2 : if (start == end &&
3449 [ # # ]: 0 : (partial_start || (partial_end != sb->s_blocksize - 1))) {
3450 : 0 : err = ext4_block_zero_page_range(handle, mapping,
3451 : : lstart, length);
3452 : 0 : return err;
3453 : : }
3454 : : /* Handle partial zero out on the start of the range */
3455 [ - + ]: 4 : if (partial_start) {
3456 : 0 : err = ext4_block_zero_page_range(handle, mapping,
3457 : : lstart, sb->s_blocksize);
3458 [ # # ]: 0 : if (err)
3459 : : return err;
3460 : : }
3461 : : /* Handle partial zero out on the end of the range */
3462 [ - + ]: 2 : if (partial_end != sb->s_blocksize - 1)
3463 : 0 : err = ext4_block_zero_page_range(handle, mapping,
3464 : : byte_end - partial_end,
3465 : 0 : partial_end + 1);
3466 : 2 : return err;
3467 : : }
3468 : :
3469 : 0 : int ext4_can_truncate(struct inode *inode)
3470 : : {
3471 [ + + ]: 97715 : if (S_ISREG(inode->i_mode))
3472 : : return 1;
3473 [ + + ]: 48867 : if (S_ISDIR(inode->i_mode))
3474 : : return 1;
3475 [ + ]: 7223 : if (S_ISLNK(inode->i_mode))
3476 : 7223 : return !ext4_inode_is_fast_symlink(inode);
3477 : : return 0;
3478 : : }
3479 : :
3480 : : /*
3481 : : * ext4_punch_hole: punches a hole in a file by releaseing the blocks
3482 : : * associated with the given offset and length
3483 : : *
3484 : : * @inode: File inode
3485 : : * @offset: The offset where the hole will begin
3486 : : * @len: The length of the hole
3487 : : *
3488 : : * Returns: 0 on success or negative on failure
3489 : : */
3490 : :
3491 : 0 : int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3492 : : {
3493 : 4 : struct super_block *sb = inode->i_sb;
3494 : : ext4_lblk_t first_block, stop_block;
3495 : 2 : struct address_space *mapping = inode->i_mapping;
3496 : : loff_t first_block_offset, last_block_offset;
3497 : : handle_t *handle;
3498 : : unsigned int credits;
3499 : : int ret = 0;
3500 : :
3501 [ + - ]: 2 : if (!S_ISREG(inode->i_mode))
3502 : : return -EOPNOTSUPP;
3503 : :
3504 [ + - ]: 2 : if (EXT4_SB(sb)->s_cluster_ratio > 1) {
3505 : : /* TODO: Add support for bigalloc file systems */
3506 : : return -EOPNOTSUPP;
3507 : : }
3508 : :
3509 : : trace_ext4_punch_hole(inode, offset, length);
3510 : :
3511 : : /*
3512 : : * Write out all dirty pages to avoid race conditions
3513 : : * Then release them.
3514 : : */
3515 [ + - ][ + - ]: 2 : if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
3516 : 2 : ret = filemap_write_and_wait_range(mapping, offset,
3517 : 2 : offset + length - 1);
3518 [ + - ]: 2 : if (ret)
3519 : : return ret;
3520 : : }
3521 : :
3522 : 2 : mutex_lock(&inode->i_mutex);
3523 : : /* It's not possible punch hole on append only file */
3524 [ + - ]: 2 : if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
3525 : : ret = -EPERM;
3526 : : goto out_mutex;
3527 : : }
3528 [ + - ]: 2 : if (IS_SWAPFILE(inode)) {
3529 : : ret = -ETXTBSY;
3530 : : goto out_mutex;
3531 : : }
3532 : :
3533 : : /* No need to punch hole beyond i_size */
3534 [ + - ]: 2 : if (offset >= inode->i_size)
3535 : : goto out_mutex;
3536 : :
3537 : : /*
3538 : : * If the hole extends beyond i_size, set the hole
3539 : : * to end after the page that contains i_size
3540 : : */
3541 [ - + ]: 2 : if (offset + length > inode->i_size) {
3542 : 0 : length = inode->i_size +
3543 : 0 : PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
3544 : : offset;
3545 : : }
3546 : :
3547 [ + - ][ - + ]: 2 : if (offset & (sb->s_blocksize - 1) ||
3548 : 2 : (offset + length) & (sb->s_blocksize - 1)) {
3549 : : /*
3550 : : * Attach jinode to inode for jbd2 if we do any zeroing of
3551 : : * partial block
3552 : : */
3553 : 0 : ret = ext4_inode_attach_jinode(inode);
3554 [ # # ]: 0 : if (ret < 0)
3555 : : goto out_mutex;
3556 : :
3557 : : }
3558 : :
3559 : 2 : first_block_offset = round_up(offset, sb->s_blocksize);
3560 : 2 : last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
3561 : :
3562 : : /* Now release the pages and zero block aligned part of pages*/
3563 [ + - ]: 2 : if (last_block_offset > first_block_offset)
3564 : 2 : truncate_pagecache_range(inode, first_block_offset,
3565 : : last_block_offset);
3566 : :
3567 : : /* Wait all existing dio workers, newcomers will block on i_mutex */
3568 : : ext4_inode_block_unlocked_dio(inode);
3569 : 2 : inode_dio_wait(inode);
3570 : :
3571 [ + - ]: 4 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3572 : 2 : credits = ext4_writepage_trans_blocks(inode);
3573 : : else
3574 : : credits = ext4_blocks_for_truncate(inode);
3575 : 2 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
3576 [ - + ]: 2 : if (IS_ERR(handle)) {
3577 : : ret = PTR_ERR(handle);
3578 [ # # ]: 0 : ext4_std_error(sb, ret);
3579 : : goto out_dio;
3580 : : }
3581 : :
3582 : 2 : ret = ext4_zero_partial_blocks(handle, inode, offset,
3583 : : length);
3584 [ + - ]: 2 : if (ret)
3585 : : goto out_stop;
3586 : :
3587 : 4 : first_block = (offset + sb->s_blocksize - 1) >>
3588 : 2 : EXT4_BLOCK_SIZE_BITS(sb);
3589 : 2 : stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
3590 : :
3591 : : /* If there are no blocks to remove, return now */
3592 [ + - ]: 2 : if (first_block >= stop_block)
3593 : : goto out_stop;
3594 : :
3595 : 2 : down_write(&EXT4_I(inode)->i_data_sem);
3596 : 2 : ext4_discard_preallocations(inode);
3597 : :
3598 : 2 : ret = ext4_es_remove_extent(inode, first_block,
3599 : : stop_block - first_block);
3600 [ - + ]: 2 : if (ret) {
3601 : 0 : up_write(&EXT4_I(inode)->i_data_sem);
3602 : 0 : goto out_stop;
3603 : : }
3604 : :
3605 [ + - ]: 2 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3606 : 2 : ret = ext4_ext_remove_space(inode, first_block,
3607 : : stop_block - 1);
3608 : : else
3609 : 0 : ret = ext4_free_hole_blocks(handle, inode, first_block,
3610 : : stop_block);
3611 : :
3612 : 2 : ext4_discard_preallocations(inode);
3613 : 2 : up_write(&EXT4_I(inode)->i_data_sem);
3614 [ + - ][ - + ]: 2 : if (IS_SYNC(inode))
3615 : : ext4_handle_sync(handle);
3616 : 2 : inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3617 : 2 : ext4_mark_inode_dirty(handle, inode);
3618 : : out_stop:
3619 : 2 : ext4_journal_stop(handle);
3620 : : out_dio:
3621 : : ext4_inode_resume_unlocked_dio(inode);
3622 : : out_mutex:
3623 : 2 : mutex_unlock(&inode->i_mutex);
3624 : 2 : return ret;
3625 : : }
3626 : :
3627 : 0 : int ext4_inode_attach_jinode(struct inode *inode)
3628 : : {
3629 : : struct ext4_inode_info *ei = EXT4_I(inode);
3630 : : struct jbd2_inode *jinode;
3631 : :
3632 [ + + ][ + - ]: 1581807 : if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal)
3633 : : return 0;
3634 : :
3635 : : jinode = jbd2_alloc_inode(GFP_KERNEL);
3636 : : spin_lock(&inode->i_lock);
3637 [ + - ]: 329204 : if (!ei->jinode) {
3638 [ - + ]: 329204 : if (!jinode) {
3639 : : spin_unlock(&inode->i_lock);
3640 : 0 : return -ENOMEM;
3641 : : }
3642 : 329204 : ei->jinode = jinode;
3643 : 329204 : jbd2_journal_init_jbd_inode(ei->jinode, inode);
3644 : : jinode = NULL;
3645 : : }
3646 : : spin_unlock(&inode->i_lock);
3647 [ - + ]: 329204 : if (unlikely(jinode != NULL))
3648 : : jbd2_free_inode(jinode);
3649 : : return 0;
3650 : : }
3651 : :
3652 : : /*
3653 : : * ext4_truncate()
3654 : : *
3655 : : * We block out ext4_get_block() block instantiations across the entire
3656 : : * transaction, and VFS/VM ensures that ext4_truncate() cannot run
3657 : : * simultaneously on behalf of the same inode.
3658 : : *
3659 : : * As we work through the truncate and commit bits of it to the journal there
3660 : : * is one core, guiding principle: the file's tree must always be consistent on
3661 : : * disk. We must be able to restart the truncate after a crash.
3662 : : *
3663 : : * The file's tree may be transiently inconsistent in memory (although it
3664 : : * probably isn't), but whenever we close off and commit a journal transaction,
3665 : : * the contents of (the filesystem + the journal) must be consistent and
3666 : : * restartable. It's pretty simple, really: bottom up, right to left (although
3667 : : * left-to-right works OK too).
3668 : : *
3669 : : * Note that at recovery time, journal replay occurs *before* the restart of
3670 : : * truncate against the orphan inode list.
3671 : : *
3672 : : * The committed inode has the new, desired i_size (which is the same as
3673 : : * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see
3674 : : * that this inode's truncate did not complete and it will again call
3675 : : * ext4_truncate() to have another go. So there will be instantiated blocks
3676 : : * to the right of the truncation point in a crashed ext4 filesystem. But
3677 : : * that's fine - as long as they are linked from the inode, the post-crash
3678 : : * ext4_truncate() run will find them and release them.
3679 : : */
3680 : 0 : void ext4_truncate(struct inode *inode)
3681 : : {
3682 : : struct ext4_inode_info *ei = EXT4_I(inode);
3683 : : unsigned int credits;
3684 : : handle_t *handle;
3685 : 97715 : struct address_space *mapping = inode->i_mapping;
3686 : :
3687 : : /*
3688 : : * There is a possibility that we're either freeing the inode
3689 : : * or it completely new indode. In those cases we might not
3690 : : * have i_mutex locked because it's not necessary.
3691 : : */
3692 [ + + ]: 97715 : if (!(inode->i_state & (I_NEW|I_FREEING)))
3693 [ - + ]: 39426 : WARN_ON(!mutex_is_locked(&inode->i_mutex));
3694 : : trace_ext4_truncate_enter(inode);
3695 : :
3696 [ + + ]: 97715 : if (!ext4_can_truncate(inode))
3697 : : return;
3698 : :
3699 : : ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3700 : :
3701 [ + + ][ + - ]: 97715 : if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3702 : : ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
3703 : :
3704 [ - + ]: 97715 : if (ext4_has_inline_data(inode)) {
3705 : 0 : int has_inline = 1;
3706 : :
3707 : 0 : ext4_inline_data_truncate(inode, &has_inline);
3708 [ # # ]: 0 : if (has_inline)
3709 : 0 : return;
3710 : : }
3711 : :
3712 : : /* If we zero-out tail of the page, we have to create jinode for jbd2 */
3713 [ + + ]: 97715 : if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
3714 [ + ]: 20426 : if (ext4_inode_attach_jinode(inode) < 0)
3715 : : return;
3716 : : }
3717 : :
3718 [ + - ]: 195430 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3719 : 97715 : credits = ext4_writepage_trans_blocks(inode);
3720 : : else
3721 : : credits = ext4_blocks_for_truncate(inode);
3722 : :
3723 : 97715 : handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
3724 [ - + ]: 97715 : if (IS_ERR(handle)) {
3725 [ # # ]: 0 : ext4_std_error(inode->i_sb, PTR_ERR(handle));
3726 : : return;
3727 : : }
3728 : :
3729 [ + + ]: 97715 : if (inode->i_size & (inode->i_sb->s_blocksize - 1))
3730 : 20426 : ext4_block_truncate_page(handle, mapping, inode->i_size);
3731 : :
3732 : : /*
3733 : : * We add the inode to the orphan list, so that if this
3734 : : * truncate spans multiple transactions, and we crash, we will
3735 : : * resume the truncate when the filesystem recovers. It also
3736 : : * marks the inode dirty, to catch the new size.
3737 : : *
3738 : : * Implication: the file must always be in a sane, consistent
3739 : : * truncatable state while each transaction commits.
3740 : : */
3741 [ + - ]: 97715 : if (ext4_orphan_add(handle, inode))
3742 : : goto out_stop;
3743 : :
3744 : 97715 : down_write(&EXT4_I(inode)->i_data_sem);
3745 : :
3746 : 97715 : ext4_discard_preallocations(inode);
3747 : :
3748 [ + - ]: 97715 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3749 : 97715 : ext4_ext_truncate(handle, inode);
3750 : : else
3751 : 0 : ext4_ind_truncate(handle, inode);
3752 : :
3753 : 97715 : up_write(&ei->i_data_sem);
3754 : :
3755 [ + - ][ - + ]: 97715 : if (IS_SYNC(inode))
3756 : : ext4_handle_sync(handle);
3757 : :
3758 : : out_stop:
3759 : : /*
3760 : : * If this was a simple ftruncate() and the file will remain alive,
3761 : : * then we need to clear up the orphan record which we created above.
3762 : : * However, if this was a real unlink then we were called by
3763 : : * ext4_delete_inode(), and we allow that function to clean up the
3764 : : * orphan info for us.
3765 : : */
3766 [ + + ]: 97715 : if (inode->i_nlink)
3767 : 39427 : ext4_orphan_del(handle, inode);
3768 : :
3769 : 97715 : inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3770 : 97715 : ext4_mark_inode_dirty(handle, inode);
3771 : 97715 : ext4_journal_stop(handle);
3772 : :
3773 : : trace_ext4_truncate_exit(inode);
3774 : : }
3775 : :
3776 : : /*
3777 : : * ext4_get_inode_loc returns with an extra refcount against the inode's
3778 : : * underlying buffer_head on success. If 'in_mem' is true, we have all
3779 : : * data in memory that is needed to recreate the on-disk version of this
3780 : : * inode.
3781 : : */
3782 : 0 : static int __ext4_get_inode_loc(struct inode *inode,
3783 : : struct ext4_iloc *iloc, int in_mem)
3784 : : {
3785 : : struct ext4_group_desc *gdp;
3786 : : struct buffer_head *bh;
3787 : 54285487 : struct super_block *sb = inode->i_sb;
3788 : : ext4_fsblk_t block;
3789 : : int inodes_per_block, inode_offset;
3790 : :
3791 : 10856246 : iloc->bh = NULL;
3792 [ + + ]: 10856246 : if (!ext4_valid_inum(sb, inode->i_ino))
3793 : : return -EIO;
3794 : :
3795 : 21711710 : iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
3796 : 10855855 : gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
3797 [ + + ]: 10855982 : if (!gdp)
3798 : : return -EIO;
3799 : :
3800 : : /*
3801 : : * Figure out the offset within the block group inode table
3802 : : */
3803 : 10855286 : inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
3804 : 21710572 : inode_offset = ((inode->i_ino - 1) %
3805 : 10855286 : EXT4_INODES_PER_GROUP(sb));
3806 : 10855286 : block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
3807 : 21711334 : iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3808 : :
3809 : : bh = sb_getblk(sb, block);
3810 [ + ]: 10856291 : if (unlikely(!bh))
3811 : : return -ENOMEM;
3812 [ + + ]: 10856354 : if (!buffer_uptodate(bh)) {
3813 : : lock_buffer(bh);
3814 : :
3815 : : /*
3816 : : * If the buffer has the write error flag, we have failed
3817 : : * to write out another inode in the same block. In this
3818 : : * case, we don't have to read the block because we may
3819 : : * read the old inode data successfully.
3820 : : */
3821 [ - + ][ # # ]: 1479 : if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
3822 : : set_buffer_uptodate(bh);
3823 : :
3824 [ + + ]: 1479 : if (buffer_uptodate(bh)) {
3825 : : /* someone brought it uptodate while we waited */
3826 : 69 : unlock_buffer(bh);
3827 : 69 : goto has_buffer;
3828 : : }
3829 : :
3830 : : /*
3831 : : * If we have all information of the inode in memory and this
3832 : : * is the only valid inode in the block, we need not read the
3833 : : * block.
3834 : : */
3835 [ + + ]: 1410 : if (in_mem) {
3836 : : struct buffer_head *bitmap_bh;
3837 : : int i, start;
3838 : :
3839 : 1296 : start = inode_offset & ~(inodes_per_block - 1);
3840 : :
3841 : : /* Is the inode bitmap in cache? */
3842 : 1296 : bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3843 [ + - ]: 1296 : if (unlikely(!bitmap_bh))
3844 : : goto make_io;
3845 : :
3846 : : /*
3847 : : * If the inode bitmap isn't in cache then the
3848 : : * optimisation may end up performing two reads instead
3849 : : * of one, so skip it.
3850 : : */
3851 [ + + ]: 1296 : if (!buffer_uptodate(bitmap_bh)) {
3852 : : brelse(bitmap_bh);
3853 : : goto make_io;
3854 : : }
3855 [ + + ]: 21122 : for (i = start; i < start + inodes_per_block; i++) {
3856 [ + + ]: 19883 : if (i == inode_offset)
3857 : 1257 : continue;
3858 [ + + ]: 18626 : if (ext4_test_bit(i, bitmap_bh->b_data))
3859 : : break;
3860 : : }
3861 : : brelse(bitmap_bh);
3862 [ + + ]: 1280 : if (i == start + inodes_per_block) {
3863 : : /* all other inodes are free, so skip I/O */
3864 [ + - ]: 1239 : memset(bh->b_data, 0, bh->b_size);
3865 : : set_buffer_uptodate(bh);
3866 : 1239 : unlock_buffer(bh);
3867 : 1239 : goto has_buffer;
3868 : : }
3869 : : }
3870 : :
3871 : : make_io:
3872 : : /*
3873 : : * If we need to do any I/O, try to pre-readahead extra
3874 : : * blocks from the inode table.
3875 : : */
3876 [ + - ]: 171 : if (EXT4_SB(sb)->s_inode_readahead_blks) {
3877 : : ext4_fsblk_t b, end, table;
3878 : : unsigned num;
3879 : : __u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks;
3880 : :
3881 : 171 : table = ext4_inode_table(sb, gdp);
3882 : : /* s_inode_readahead_blks is always a power of 2 */
3883 : 171 : b = block & ~((ext4_fsblk_t) ra_blks - 1);
3884 [ + + ]: 10856417 : if (table > b)
3885 : : b = table;
3886 : 171 : end = b + ra_blks;
3887 : 171 : num = EXT4_INODES_PER_GROUP(sb);
3888 [ + - ]: 171 : if (ext4_has_group_desc_csum(sb))
3889 : 171 : num -= ext4_itable_unused_count(sb, gdp);
3890 : 171 : table += num / inodes_per_block;
3891 [ + + ]: 171 : if (end > table)
3892 : : end = table;
3893 [ + + ]: 5299 : while (b <= end)
3894 : 5128 : sb_breadahead(sb, b++);
3895 : : }
3896 : :
3897 : : /*
3898 : : * There are other valid inodes in the buffer, this inode
3899 : : * has in-inode xattrs, or we don't have this inode in memory.
3900 : : * Read the block from disk.
3901 : : */
3902 : : trace_ext4_load_inode(inode);
3903 : : get_bh(bh);
3904 : 171 : bh->b_end_io = end_buffer_read_sync;
3905 : 171 : submit_bh(READ | REQ_META | REQ_PRIO, bh);
3906 : : wait_on_buffer(bh);
3907 [ - + ]: 333 : if (!buffer_uptodate(bh)) {
3908 : 0 : EXT4_ERROR_INODE_BLOCK(inode, block,
3909 : : "unable to read itable block");
3910 : : brelse(bh);
3911 : : return -EIO;
3912 : : }
3913 : : }
3914 : : has_buffer:
3915 : 10856516 : iloc->bh = bh;
3916 : 10856516 : return 0;
3917 : : }
3918 : :
3919 : 0 : int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
3920 : : {
3921 : : /* We have all inode data except xattrs in memory here. */
3922 : 10850591 : return __ext4_get_inode_loc(inode, iloc,
3923 : : !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
3924 : : }
3925 : :
3926 : 0 : void ext4_set_inode_flags(struct inode *inode)
3927 : : {
3928 : 471075 : unsigned int flags = EXT4_I(inode)->i_flags;
3929 : :
3930 : 471075 : inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
3931 [ - + ]: 471075 : if (flags & EXT4_SYNC_FL)
3932 : 0 : inode->i_flags |= S_SYNC;
3933 [ + + ]: 471075 : if (flags & EXT4_APPEND_FL)
3934 : 30 : inode->i_flags |= S_APPEND;
3935 [ + + ]: 471075 : if (flags & EXT4_IMMUTABLE_FL)
3936 : 24 : inode->i_flags |= S_IMMUTABLE;
3937 [ # # ]: 471075 : if (flags & EXT4_NOATIME_FL)
3938 : 0 : inode->i_flags |= S_NOATIME;
3939 [ # # ]: 0 : if (flags & EXT4_DIRSYNC_FL)
3940 : 0 : inode->i_flags |= S_DIRSYNC;
3941 : 0 : }
3942 : :
3943 : : /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
3944 : 10850671 : void ext4_get_inode_flags(struct ext4_inode_info *ei)
3945 : : {
3946 : : unsigned int vfs_fl;
3947 : : unsigned long old_fl, new_fl;
3948 : :
3949 : : do {
3950 : 10850671 : vfs_fl = ei->vfs_inode.i_flags;
3951 : 10850671 : old_fl = ei->i_flags;
3952 : 10850671 : new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
3953 : : EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
3954 : : EXT4_DIRSYNC_FL);
3955 [ - + ]: 10850671 : if (vfs_fl & S_SYNC)
3956 : 0 : new_fl |= EXT4_SYNC_FL;
3957 [ + + ]: 10850671 : if (vfs_fl & S_APPEND)
3958 : 96 : new_fl |= EXT4_APPEND_FL;
3959 [ + + ]: 10850671 : if (vfs_fl & S_IMMUTABLE)
3960 : 72 : new_fl |= EXT4_IMMUTABLE_FL;
3961 [ - + ]: 10850671 : if (vfs_fl & S_NOATIME)
3962 : 0 : new_fl |= EXT4_NOATIME_FL;
3963 [ # # ]: 10850671 : if (vfs_fl & S_DIRSYNC)
3964 : 0 : new_fl |= EXT4_DIRSYNC_FL;
3965 [ - + ]: 10850955 : } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
3966 : 10850955 : }
3967 : :
3968 : : static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
3969 : : struct ext4_inode_info *ei)
3970 : : {
3971 : : blkcnt_t i_blocks ;
3972 : : struct inode *inode = &(ei->vfs_inode);
3973 : 5535 : struct super_block *sb = inode->i_sb;
3974 : :
3975 [ + - ]: 5535 : if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3976 : : EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
3977 : : /* we are using combined 48 bit field */
3978 : 5535 : i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
3979 : : le32_to_cpu(raw_inode->i_blocks_lo);
3980 [ - + ]: 5535 : if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
3981 : : /* i_blocks represent file system block size */
3982 : 0 : return i_blocks << (inode->i_blkbits - 9);
3983 : : } else {
3984 : : return i_blocks;
3985 : : }
3986 : : } else {
3987 : 0 : return le32_to_cpu(raw_inode->i_blocks_lo);
3988 : : }
3989 : : }
3990 : :
3991 : : static inline void ext4_iget_extra_inode(struct inode *inode,
3992 : : struct ext4_inode *raw_inode,
3993 : : struct ext4_inode_info *ei)
3994 : : {
3995 : 5535 : __le32 *magic = (void *)raw_inode +
3996 : 5535 : EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
3997 [ - + ]: 5535 : if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
3998 : : ext4_set_inode_state(inode, EXT4_STATE_XATTR);
3999 : 0 : ext4_find_inline_data_nolock(inode);
4000 : : } else
4001 : 5535 : EXT4_I(inode)->i_inline_off = 0;
4002 : : }
4003 : :
4004 : 0 : struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4005 : : {
4006 : : struct ext4_iloc iloc;
4007 : 5535 : struct ext4_inode *raw_inode;
4008 : 5535 : struct ext4_inode_info *ei;
4009 : : struct inode *inode;
4010 : 7235 : journal_t *journal = EXT4_SB(sb)->s_journal;
4011 : : long ret;
4012 : : int block;
4013 : : uid_t i_uid;
4014 : : gid_t i_gid;
4015 : :
4016 : 7235 : inode = iget_locked(sb, ino);
4017 [ + - ]: 7235 : if (!inode)
4018 : : return ERR_PTR(-ENOMEM);
4019 [ + + ]: 7235 : if (!(inode->i_state & I_NEW))
4020 : : return inode;
4021 : :
4022 : 5535 : ei = EXT4_I(inode);
4023 : 5535 : iloc.bh = NULL;
4024 : :
4025 : 5535 : ret = __ext4_get_inode_loc(inode, &iloc, 0);
4026 [ + - ]: 5535 : if (ret < 0)
4027 : : goto bad_inode;
4028 : 5535 : raw_inode = ext4_raw_inode(&iloc);
4029 : :
4030 [ + - ]: 5535 : if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4031 : 5535 : ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
4032 [ - + ]: 5535 : if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
4033 : 5535 : EXT4_INODE_SIZE(inode->i_sb)) {
4034 : 0 : EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)",
4035 : : EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize,
4036 : : EXT4_INODE_SIZE(inode->i_sb));
4037 : : ret = -EIO;
4038 : 0 : goto bad_inode;
4039 : : }
4040 : : } else
4041 : 0 : ei->i_extra_isize = 0;
4042 : :
4043 : : /* Precompute checksum seed for inode metadata */
4044 [ - + ]: 5535 : if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4045 : : EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
4046 : 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4047 : : __u32 csum;
4048 : 0 : __le32 inum = cpu_to_le32(inode->i_ino);
4049 : 0 : __le32 gen = raw_inode->i_generation;
4050 : 0 : csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
4051 : : sizeof(inum));
4052 : 0 : ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
4053 : : sizeof(gen));
4054 : : }
4055 : :
4056 [ - + ]: 5535 : if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
4057 : 0 : EXT4_ERROR_INODE(inode, "checksum invalid");
4058 : : ret = -EIO;
4059 : 0 : goto bad_inode;
4060 : : }
4061 : :
4062 : 5535 : inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4063 : 5535 : i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
4064 : 5535 : i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
4065 [ + - ]: 5535 : if (!(test_opt(inode->i_sb, NO_UID32))) {
4066 : 5535 : i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
4067 : 5535 : i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
4068 : : }
4069 : : i_uid_write(inode, i_uid);
4070 : : i_gid_write(inode, i_gid);
4071 : 5535 : set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
4072 : :
4073 : : ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
4074 : 5535 : ei->i_inline_off = 0;
4075 : 5535 : ei->i_dir_start_lookup = 0;
4076 : 5535 : ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
4077 : : /* We now have enough fields to check if the inode was active or not.
4078 : : * This is needed because nfsd might try to access dead inodes
4079 : : * the test is that same one that e2fsck uses
4080 : : * NeilBrown 1999oct15
4081 : : */
4082 [ - + ]: 5535 : if (inode->i_nlink == 0) {
4083 [ # # ][ # # ]: 0 : if ((inode->i_mode == 0 ||
4084 [ # # ]: 0 : !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
4085 : : ino != EXT4_BOOT_LOADER_INO) {
4086 : : /* this inode is deleted */
4087 : : ret = -ESTALE;
4088 : : goto bad_inode;
4089 : : }
4090 : : /* The only unlinked inodes we let through here have
4091 : : * valid i_mode and are being read by the orphan
4092 : : * recovery code: that's fine, we're about to complete
4093 : : * the process of deleting those.
4094 : : * OR it is the EXT4_BOOT_LOADER_INO which is
4095 : : * not initialized on a new filesystem. */
4096 : : }
4097 : 5535 : ei->i_flags = le32_to_cpu(raw_inode->i_flags);
4098 : 5535 : inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
4099 : 5535 : ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
4100 [ - + ]: 5535 : if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
4101 : 0 : ei->i_file_acl |=
4102 : 0 : ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4103 : 5535 : inode->i_size = ext4_isize(raw_inode);
4104 : 5535 : ei->i_disksize = inode->i_size;
4105 : : #ifdef CONFIG_QUOTA
4106 : 5535 : ei->i_reserved_quota = 0;
4107 : : #endif
4108 : 5535 : inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4109 : 5535 : ei->i_block_group = iloc.block_group;
4110 : 5535 : ei->i_last_alloc_group = ~0;
4111 : : /*
4112 : : * NOTE! The in-memory inode i_data array is in little-endian order
4113 : : * even on big-endian machines: we do NOT byteswap the block numbers!
4114 : : */
4115 [ + + ]: 88560 : for (block = 0; block < EXT4_N_BLOCKS; block++)
4116 : 83025 : ei->i_data[block] = raw_inode->i_block[block];
4117 : 5535 : INIT_LIST_HEAD(&ei->i_orphan);
4118 : :
4119 : : /*
4120 : : * Set transaction id's of transactions that have to be committed
4121 : : * to finish f[data]sync. We set them to currently running transaction
4122 : : * as we cannot be sure that the inode or some of its metadata isn't
4123 : : * part of the transaction - the inode could have been reclaimed and
4124 : : * now it is reread from disk.
4125 : : */
4126 [ + - ]: 5535 : if (journal) {
4127 : : transaction_t *transaction;
4128 : : tid_t tid;
4129 : :
4130 : 5535 : read_lock(&journal->j_state_lock);
4131 [ + + ]: 5535 : if (journal->j_running_transaction)
4132 : : transaction = journal->j_running_transaction;
4133 : : else
4134 : 152 : transaction = journal->j_committing_transaction;
4135 [ + + ]: 5535 : if (transaction)
4136 : 5410 : tid = transaction->t_tid;
4137 : : else
4138 : 125 : tid = journal->j_commit_sequence;
4139 : : read_unlock(&journal->j_state_lock);
4140 : 5535 : ei->i_sync_tid = tid;
4141 : 5535 : ei->i_datasync_tid = tid;
4142 : : }
4143 : :
4144 [ + - ]: 5535 : if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4145 [ - + ]: 5535 : if (ei->i_extra_isize == 0) {
4146 : : /* The extra space is currently unused. Use it. */
4147 : 0 : ei->i_extra_isize = sizeof(struct ext4_inode) -
4148 : : EXT4_GOOD_OLD_INODE_SIZE;
4149 : : } else {
4150 : : ext4_iget_extra_inode(inode, raw_inode, ei);
4151 : : }
4152 : : }
4153 : :
4154 [ + - ]: 5535 : EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
4155 [ + - ]: 5535 : EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
4156 [ + - ]: 5535 : EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
4157 [ + - ][ + - ]: 5535 : EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
4158 : :
4159 : 5535 : inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
4160 [ + - ]: 5535 : if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4161 [ + - ]: 5535 : if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4162 : 5535 : inode->i_version |=
4163 : 5535 : (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
4164 : : }
4165 : :
4166 : : ret = 0;
4167 [ - + # # ]: 5535 : if (ei->i_file_acl &&
4168 : 0 : !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4169 : 0 : EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
4170 : : ei->i_file_acl);
4171 : : ret = -EIO;
4172 : 0 : goto bad_inode;
4173 [ + - ]: 5535 : } else if (!ext4_has_inline_data(inode)) {
4174 [ + + ]: 5535 : if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
4175 [ - + ][ # # ]: 5405 : if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4176 [ # # ]: 0 : (S_ISLNK(inode->i_mode) &&
4177 : : !ext4_inode_is_fast_symlink(inode))))
4178 : : /* Validate extent which is part of inode */
4179 : 5405 : ret = ext4_ext_check_inode(inode);
4180 [ + - ][ + - ]: 130 : } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4181 [ - + ]: 130 : (S_ISLNK(inode->i_mode) &&
4182 : : !ext4_inode_is_fast_symlink(inode))) {
4183 : : /* Validate block references which are part of inode */
4184 : 0 : ret = ext4_ind_check_inode(inode);
4185 : : }
4186 : : }
4187 [ + - ]: 5535 : if (ret)
4188 : : goto bad_inode;
4189 : :
4190 [ + + ]: 5535 : if (S_ISREG(inode->i_mode)) {
4191 : 4961 : inode->i_op = &ext4_file_inode_operations;
4192 : 4961 : inode->i_fop = &ext4_file_operations;
4193 : 4961 : ext4_set_aops(inode);
4194 [ + + ]: 574 : } else if (S_ISDIR(inode->i_mode)) {
4195 : 444 : inode->i_op = &ext4_dir_inode_operations;
4196 : 444 : inode->i_fop = &ext4_dir_operations;
4197 [ + - ]: 130 : } else if (S_ISLNK(inode->i_mode)) {
4198 [ + - ]: 130 : if (ext4_inode_is_fast_symlink(inode)) {
4199 : 130 : inode->i_op = &ext4_fast_symlink_inode_operations;
4200 : 130 : nd_terminate_link(ei->i_data, inode->i_size,
4201 : : sizeof(ei->i_data) - 1);
4202 : : } else {
4203 : 0 : inode->i_op = &ext4_symlink_inode_operations;
4204 : 0 : ext4_set_aops(inode);
4205 : : }
4206 [ # # ][ # # ]: 0 : } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
4207 [ # # ]: 0 : S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
4208 : 0 : inode->i_op = &ext4_special_inode_operations;
4209 [ # # ]: 0 : if (raw_inode->i_block[0])
4210 : 0 : init_special_inode(inode, inode->i_mode,
4211 : : old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
4212 : : else
4213 : 0 : init_special_inode(inode, inode->i_mode,
4214 : : new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4215 [ # # ]: 0 : } else if (ino == EXT4_BOOT_LOADER_INO) {
4216 : 0 : make_bad_inode(inode);
4217 : : } else {
4218 : : ret = -EIO;
4219 : 0 : EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
4220 : 0 : goto bad_inode;
4221 : : }
4222 : 5535 : brelse(iloc.bh);
4223 : 5535 : ext4_set_inode_flags(inode);
4224 : 5535 : unlock_new_inode(inode);
4225 : 5535 : return inode;
4226 : :
4227 : : bad_inode:
4228 : 0 : brelse(iloc.bh);
4229 : 0 : iget_failed(inode);
4230 : 0 : return ERR_PTR(ret);
4231 : : }
4232 : :
4233 : 10850655 : static int ext4_inode_blocks_set(handle_t *handle,
4234 : : struct ext4_inode *raw_inode,
4235 : : struct ext4_inode_info *ei)
4236 : : {
4237 : : struct inode *inode = &(ei->vfs_inode);
4238 : 10850655 : u64 i_blocks = inode->i_blocks;
4239 : 10850655 : struct super_block *sb = inode->i_sb;
4240 : :
4241 [ + - ]: 10850655 : if (i_blocks <= ~0U) {
4242 : : /*
4243 : : * i_blocks can be represented in a 32 bit variable
4244 : : * as multiple of 512 bytes
4245 : : */
4246 : 10850655 : raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4247 : 10850655 : raw_inode->i_blocks_high = 0;
4248 : : ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4249 : : return 0;
4250 : : }
4251 [ # # ]: 0 : if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
4252 : : return -EFBIG;
4253 : :
4254 [ # # ]: 0 : if (i_blocks <= 0xffffffffffffULL) {
4255 : : /*
4256 : : * i_blocks can be represented in a 48 bit variable
4257 : : * as multiple of 512 bytes
4258 : : */
4259 : 0 : raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4260 : 0 : raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4261 : : ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4262 : : } else {
4263 : : ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
4264 : : /* i_block is stored in file system block size */
4265 : 0 : i_blocks = i_blocks >> (inode->i_blkbits - 9);
4266 : 0 : raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4267 : 0 : raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4268 : : }
4269 : : return 0;
4270 : : }
4271 : :
4272 : : /*
4273 : : * Post the struct inode info into an on-disk inode location in the
4274 : : * buffer-cache. This gobbles the caller's reference to the
4275 : : * buffer_head in the inode location struct.
4276 : : *
4277 : : * The caller must have write access to iloc->bh.
4278 : : */
4279 : 0 : static int ext4_do_update_inode(handle_t *handle,
4280 : 10850748 : struct inode *inode,
4281 : 10850399 : struct ext4_iloc *iloc)
4282 : : {
4283 : : struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
4284 : 10850399 : struct ext4_inode_info *ei = EXT4_I(inode);
4285 : : struct buffer_head *bh = iloc->bh;
4286 : : int err = 0, rc, block;
4287 : : int need_datasync = 0;
4288 : : uid_t i_uid;
4289 : : gid_t i_gid;
4290 : :
4291 : : /* For fields not not tracking in the in-memory inode,
4292 : : * initialise them to zero for new inodes. */
4293 [ + + ]: 10850399 : if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
4294 [ + + ]: 465418 : memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
4295 : :
4296 : 10850402 : ext4_get_inode_flags(ei);
4297 : 10850748 : raw_inode->i_mode = cpu_to_le16(inode->i_mode);
4298 : : i_uid = i_uid_read(inode);
4299 : : i_gid = i_gid_read(inode);
4300 [ + - ]: 10850748 : if (!(test_opt(inode->i_sb, NO_UID32))) {
4301 : 10850748 : raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
4302 : 10850748 : raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
4303 : : /*
4304 : : * Fix up interoperability with old kernels. Otherwise, old inodes get
4305 : : * re-used with the upper 16 bits of the uid/gid intact
4306 : : */
4307 [ + + ]: 10850748 : if (!ei->i_dtime) {
4308 : 10312264 : raw_inode->i_uid_high =
4309 : 10312264 : cpu_to_le16(high_16_bits(i_uid));
4310 : 10312264 : raw_inode->i_gid_high =
4311 : 10312264 : cpu_to_le16(high_16_bits(i_gid));
4312 : : } else {
4313 : 538484 : raw_inode->i_uid_high = 0;
4314 : 538484 : raw_inode->i_gid_high = 0;
4315 : : }
4316 : : } else {
4317 [ # # ]: 0 : raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
4318 [ # # ]: 0 : raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid));
4319 : 0 : raw_inode->i_uid_high = 0;
4320 : 0 : raw_inode->i_gid_high = 0;
4321 : : }
4322 : 10850748 : raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
4323 : :
4324 [ + ]: 10850748 : EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
4325 [ + ]: 10850748 : EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
4326 [ + ]: 10850748 : EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
4327 [ + ][ + ]: 10850748 : EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
4328 : :
4329 [ + + ]: 10850748 : if (ext4_inode_blocks_set(handle, raw_inode, ei))
4330 : : goto out_brelse;
4331 : 10850241 : raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4332 : 10850241 : raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
4333 [ + ]: 10850241 : if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
4334 : : cpu_to_le32(EXT4_OS_HURD))
4335 : 10850252 : raw_inode->i_file_acl_high =
4336 : 10850252 : cpu_to_le16(ei->i_file_acl >> 32);
4337 : 10850241 : raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
4338 [ + + ]: 10850241 : if (ei->i_disksize != ext4_isize(raw_inode)) {
4339 : : ext4_isize_set(raw_inode, ei->i_disksize);
4340 : : need_datasync = 1;
4341 : : }
4342 [ - + ]: 10850241 : if (ei->i_disksize > 0x7fffffffULL) {
4343 : 0 : struct super_block *sb = inode->i_sb;
4344 [ # # ]: 0 : if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
4345 [ # # ]: 0 : EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
4346 : 0 : EXT4_SB(sb)->s_es->s_rev_level ==
4347 : : cpu_to_le32(EXT4_GOOD_OLD_REV)) {
4348 : : /* If this is the first large file
4349 : : * created, add a flag to the superblock.
4350 : : */
4351 : 0 : err = ext4_journal_get_write_access(handle,
4352 : : EXT4_SB(sb)->s_sbh);
4353 [ # # ]: 0 : if (err)
4354 : : goto out_brelse;
4355 : 0 : ext4_update_dynamic_rev(sb);
4356 : 0 : EXT4_SET_RO_COMPAT_FEATURE(sb,
4357 : : EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
4358 : : ext4_handle_sync(handle);
4359 : 0 : err = ext4_handle_dirty_super(handle, sb);
4360 : : }
4361 : : }
4362 : 10850778 : raw_inode->i_generation = cpu_to_le32(inode->i_generation);
4363 [ + + ]: 10850778 : if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
4364 [ + - ]: 21 : if (old_valid_dev(inode->i_rdev)) {
4365 : 21 : raw_inode->i_block[0] =
4366 : 21 : cpu_to_le32(old_encode_dev(inode->i_rdev));
4367 : 21 : raw_inode->i_block[1] = 0;
4368 : : } else {
4369 : 0 : raw_inode->i_block[0] = 0;
4370 : 0 : raw_inode->i_block[1] =
4371 : 0 : cpu_to_le32(new_encode_dev(inode->i_rdev));
4372 : 0 : raw_inode->i_block[2] = 0;
4373 : : }
4374 [ + + ]: 10850757 : } else if (!ext4_has_inline_data(inode)) {
4375 [ + + ]: 173592134 : for (block = 0; block < EXT4_N_BLOCKS; block++)
4376 : 162742284 : raw_inode->i_block[block] = ei->i_data[block];
4377 : : }
4378 : :
4379 : 10850880 : raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
4380 [ + + ]: 10850880 : if (ei->i_extra_isize) {
4381 [ + ]: 10850330 : if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
4382 : 10850399 : raw_inode->i_version_hi =
4383 : 10850399 : cpu_to_le32(inode->i_version >> 32);
4384 : 10850330 : raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
4385 : : }
4386 : :
4387 : 10850880 : ext4_inode_csum_set(inode, raw_inode, ei);
4388 : :
4389 : : BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4390 : 10850347 : rc = ext4_handle_dirty_metadata(handle, NULL, bh);
4391 [ + + ]: 10851070 : if (!err)
4392 : : err = rc;
4393 : : ext4_clear_inode_state(inode, EXT4_STATE_NEW);
4394 : :
4395 : : ext4_update_inode_fsync_trans(handle, inode, need_datasync);
4396 : : out_brelse:
4397 : : brelse(bh);
4398 [ - + ]: 10850995 : ext4_std_error(inode->i_sb, err);
4399 : 10850995 : return err;
4400 : : }
4401 : :
4402 : : /*
4403 : : * ext4_write_inode()
4404 : : *
4405 : : * We are called from a few places:
4406 : : *
4407 : : * - Within generic_file_write() for O_SYNC files.
4408 : : * Here, there will be no transaction running. We wait for any running
4409 : : * transaction to commit.
4410 : : *
4411 : : * - Within sys_sync(), kupdate and such.
4412 : : * We wait on commit, if tol to.
4413 : : *
4414 : : * - Within prune_icache() (PF_MEMALLOC == true)
4415 : : * Here we simply return. We can't afford to block kswapd on the
4416 : : * journal commit.
4417 : : *
4418 : : * In all cases it is actually safe for us to return without doing anything,
4419 : : * because the inode has been copied into a raw inode buffer in
4420 : : * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for
4421 : : * knfsd.
4422 : : *
4423 : : * Note that we are absolutely dependent upon all inode dirtiers doing the
4424 : : * right thing: they *must* call mark_inode_dirty() after dirtying info in
4425 : : * which we are interested.
4426 : : *
4427 : : * It would be a bug for them to not do this. The code:
4428 : : *
4429 : : * mark_inode_dirty(inode)
4430 : : * stuff();
4431 : : * inode->i_size = expr;
4432 : : *
4433 : : * is in error because a kswapd-driven write_inode() could occur while
4434 : : * `stuff()' is running, and the new i_size will be lost. Plus the inode
4435 : : * will no longer be on the superblock's dirty inode list.
4436 : : */
4437 : 0 : int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4438 : : {
4439 : : int err;
4440 : :
4441 [ + - ]: 38757 : if (current->flags & PF_MEMALLOC)
4442 : : return 0;
4443 : :
4444 [ + - ]: 38757 : if (EXT4_SB(inode->i_sb)->s_journal) {
4445 [ - + ]: 38757 : if (ext4_journal_current_handle()) {
4446 : : jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
4447 : 0 : dump_stack();
4448 : 0 : return -EIO;
4449 : : }
4450 : :
4451 [ + + ]: 38757 : if (wbc->sync_mode != WB_SYNC_ALL)
4452 : : return 0;
4453 : :
4454 : 23 : err = ext4_force_commit(inode->i_sb);
4455 : : } else {
4456 : : struct ext4_iloc iloc;
4457 : :
4458 : 0 : err = __ext4_get_inode_loc(inode, &iloc, 0);
4459 [ # # ]: 0 : if (err)
4460 : 0 : return err;
4461 [ # # ]: 0 : if (wbc->sync_mode == WB_SYNC_ALL)
4462 : 0 : sync_dirty_buffer(iloc.bh);
4463 [ # # ][ # # ]: 38757 : if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
4464 : 0 : EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
4465 : : "IO error syncing inode");
4466 : : err = -EIO;
4467 : : }
4468 : 0 : brelse(iloc.bh);
4469 : : }
4470 : 23 : return err;
4471 : : }
4472 : :
4473 : : /*
4474 : : * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
4475 : : * buffers that are attached to a page stradding i_size and are undergoing
4476 : : * commit. In that case we have to wait for commit to finish and try again.
4477 : : */
4478 : 0 : static void ext4_wait_for_tail_page_commit(struct inode *inode)
4479 : : {
4480 : : struct page *page;
4481 : : unsigned offset;
4482 : 0 : journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
4483 : : tid_t commit_tid = 0;
4484 : : int ret;
4485 : :
4486 : 0 : offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
4487 : : /*
4488 : : * All buffers in the last page remain valid? Then there's nothing to
4489 : : * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
4490 : : * blocksize case
4491 : : */
4492 [ # # ]: 0 : if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
4493 : : return;
4494 : : while (1) {
4495 : 0 : page = find_lock_page(inode->i_mapping,
4496 : 0 : inode->i_size >> PAGE_CACHE_SHIFT);
4497 [ # # ]: 0 : if (!page)
4498 : : return;
4499 : 0 : ret = __ext4_journalled_invalidatepage(page, offset,
4500 : 0 : PAGE_CACHE_SIZE - offset);
4501 : 0 : unlock_page(page);
4502 : 0 : page_cache_release(page);
4503 [ # # ]: 0 : if (ret != -EBUSY)
4504 : : return;
4505 : : commit_tid = 0;
4506 : 0 : read_lock(&journal->j_state_lock);
4507 [ # # ]: 0 : if (journal->j_committing_transaction)
4508 : 0 : commit_tid = journal->j_committing_transaction->t_tid;
4509 : : read_unlock(&journal->j_state_lock);
4510 [ # # ]: 0 : if (commit_tid)
4511 : 0 : jbd2_log_wait_commit(journal, commit_tid);
4512 : : }
4513 : : }
4514 : :
4515 : : /*
4516 : : * ext4_setattr()
4517 : : *
4518 : : * Called from notify_change.
4519 : : *
4520 : : * We want to trap VFS attempts to truncate the file as soon as
4521 : : * possible. In particular, we want to make sure that when the VFS
4522 : : * shrinks i_size, we put the inode on the orphan list and modify
4523 : : * i_disksize immediately, so that during the subsequent flushing of
4524 : : * dirty pages and freeing of disk blocks, we can guarantee that any
4525 : : * commit will leave the blocks being flushed in an unused state on
4526 : : * disk. (On recovery, the inode will get truncated and the blocks will
4527 : : * be freed, so we have a strong guarantee that no future commit will
4528 : : * leave these blocks visible to the user.)
4529 : : *
4530 : : * Another thing we have to assure is that if we are in ordered mode
4531 : : * and inode is still attached to the committing transaction, we must
4532 : : * we start writeout of all the dirty pages which are being truncated.
4533 : : * This way we are sure that all the data written in the previous
4534 : : * transaction are already on disk (truncate waits for pages under
4535 : : * writeback).
4536 : : *
4537 : : * Called with inode->i_mutex down.
4538 : : */
4539 : 0 : int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4540 : : {
4541 : 94043 : struct inode *inode = dentry->d_inode;
4542 : : int error, rc = 0;
4543 : : int orphan = 0;
4544 : 61681 : const unsigned int ia_valid = attr->ia_valid;
4545 : :
4546 : 61681 : error = inode_change_ok(inode, attr);
4547 [ + + ]: 61681 : if (error)
4548 : : return error;
4549 : :
4550 [ + + ]: 61664 : if (is_quota_modification(inode, attr))
4551 : 34493 : dquot_initialize(inode);
4552 [ + + ][ + + ]: 123347 : if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
[ + + ]
4553 [ + + ]: 3243 : (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
4554 : : handle_t *handle;
4555 : :
4556 : : /* (user+group)*(old+new) structure, inode write (sb,
4557 : : * inode block, ? - but truncate inode update has it) */
4558 [ + - ][ - + ]: 166 : handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
[ # # ][ + - ]
[ - + ]
4559 : : (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) +
4560 : : EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3);
4561 [ - + ]: 166 : if (IS_ERR(handle)) {
4562 : : error = PTR_ERR(handle);
4563 : 0 : goto err_out;
4564 : : }
4565 : 166 : error = dquot_transfer(inode, attr);
4566 [ - + ]: 166 : if (error) {
4567 : 0 : ext4_journal_stop(handle);
4568 : 0 : return error;
4569 : : }
4570 : : /* Update corresponding info in inode so that everything is in
4571 : : * one transaction */
4572 [ + + ]: 166 : if (attr->ia_valid & ATTR_UID)
4573 : 154 : inode->i_uid = attr->ia_uid;
4574 [ + + ]: 166 : if (attr->ia_valid & ATTR_GID)
4575 : 136 : inode->i_gid = attr->ia_gid;
4576 : 166 : error = ext4_mark_inode_dirty(handle, inode);
4577 : 166 : ext4_journal_stop(handle);
4578 : : }
4579 : :
4580 [ + + ][ + + ]: 61666 : if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
4581 : : handle_t *handle;
4582 : :
4583 [ - + ]: 34327 : if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4584 : 0 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4585 : :
4586 [ # # ]: 0 : if (attr->ia_size > sbi->s_bitmap_maxbytes)
4587 : : return -EFBIG;
4588 : : }
4589 [ + - ][ + + ]: 34326 : if (S_ISREG(inode->i_mode) &&
4590 : : (attr->ia_size < inode->i_size)) {
4591 [ + ]: 32355 : if (ext4_should_order_data(inode)) {
4592 : : error = ext4_begin_ordered_truncate(inode,
4593 : : attr->ia_size);
4594 [ + - ]: 32363 : if (error)
4595 : : goto err_out;
4596 : : }
4597 : : handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4598 [ - + ]: 32362 : if (IS_ERR(handle)) {
4599 : : error = PTR_ERR(handle);
4600 : 0 : goto err_out;
4601 : : }
4602 [ + ]: 32362 : if (ext4_handle_valid(handle)) {
4603 : 32363 : error = ext4_orphan_add(handle, inode);
4604 : : orphan = 1;
4605 : : }
4606 : 32362 : down_write(&EXT4_I(inode)->i_data_sem);
4607 : 32363 : EXT4_I(inode)->i_disksize = attr->ia_size;
4608 : 32363 : rc = ext4_mark_inode_dirty(handle, inode);
4609 [ + - ]: 32363 : if (!error)
4610 : : error = rc;
4611 : : /*
4612 : : * We have to update i_size under i_data_sem together
4613 : : * with i_disksize to avoid races with writeback code
4614 : : * running ext4_wb_update_i_disksize().
4615 : : */
4616 [ + - ]: 32363 : if (!error)
4617 : 32363 : i_size_write(inode, attr->ia_size);
4618 : 32363 : up_write(&EXT4_I(inode)->i_data_sem);
4619 : 32363 : ext4_journal_stop(handle);
4620 [ - + ]: 32363 : if (error) {
4621 : 0 : ext4_orphan_del(NULL, inode);
4622 : 0 : goto err_out;
4623 : : }
4624 : : } else
4625 : : i_size_write(inode, attr->ia_size);
4626 : :
4627 : : /*
4628 : : * Blocks are going to be removed from the inode. Wait
4629 : : * for dio in flight. Temporarily disable
4630 : : * dioread_nolock to prevent livelock.
4631 : : */
4632 [ + + ]: 34327 : if (orphan) {
4633 [ + - ]: 32363 : if (!ext4_should_journal_data(inode)) {
4634 : : ext4_inode_block_unlocked_dio(inode);
4635 : 32363 : inode_dio_wait(inode);
4636 : : ext4_inode_resume_unlocked_dio(inode);
4637 : : } else
4638 : 0 : ext4_wait_for_tail_page_commit(inode);
4639 : : }
4640 : : /*
4641 : : * Truncate pagecache after we've waited for commit
4642 : : * in data=journal mode to make pages freeable.
4643 : : */
4644 : 34327 : truncate_pagecache(inode, inode->i_size);
4645 : : }
4646 : : /*
4647 : : * We want to call ext4_truncate() even if attr->ia_size ==
4648 : : * inode->i_size for cases like truncation of fallocated space
4649 : : */
4650 [ + + ]: 61665 : if (attr->ia_valid & ATTR_SIZE)
4651 : 39427 : ext4_truncate(inode);
4652 : :
4653 [ + - ]: 61665 : if (!rc) {
4654 : 61665 : setattr_copy(inode, attr);
4655 : : mark_inode_dirty(inode);
4656 : : }
4657 : :
4658 : : /*
4659 : : * If the call to ext4_truncate failed to get a transaction handle at
4660 : : * all, we need to clean up the in-core orphan list manually.
4661 : : */
4662 [ + + ][ + - ]: 61665 : if (orphan && inode->i_nlink)
4663 : 32363 : ext4_orphan_del(NULL, inode);
4664 : :
4665 [ + - ][ + + ]: 61665 : if (!rc && (ia_valid & ATTR_MODE))
4666 : : rc = ext4_acl_chmod(inode);
4667 : :
4668 : : err_out:
4669 [ - + ]: 61665 : ext4_std_error(inode->i_sb, error);
4670 [ + - ]: 61665 : if (!error)
4671 : : error = rc;
4672 : 61665 : return error;
4673 : : }
4674 : :
4675 : 0 : int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
4676 : : struct kstat *stat)
4677 : : {
4678 : : struct inode *inode;
4679 : : unsigned long long delalloc_blocks;
4680 : :
4681 : 4689544 : inode = dentry->d_inode;
4682 : 4689544 : generic_fillattr(inode, stat);
4683 : :
4684 : : /*
4685 : : * If there is inline data in the inode, the inode will normally not
4686 : : * have data blocks allocated (it may have an external xattr block).
4687 : : * Report at least one sector for such files, so tools like tar, rsync,
4688 : : * others doen't incorrectly think the file is completely sparse.
4689 : : */
4690 [ - + ]: 4689477 : if (unlikely(ext4_has_inline_data(inode)))
4691 : 0 : stat->blocks += (stat->size + 511) >> 9;
4692 : :
4693 : : /*
4694 : : * We can't update i_blocks if the block allocation is delayed
4695 : : * otherwise in the case of system crash before the real block
4696 : : * allocation is done, we will have i_blocks inconsistent with
4697 : : * on-disk file blocks.
4698 : : * We always keep i_blocks updated together with real
4699 : : * allocation. But to not confuse with user, stat
4700 : : * will return the blocks that include the delayed allocation
4701 : : * blocks for this file.
4702 : : */
4703 : 0 : delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
4704 : : EXT4_I(inode)->i_reserved_data_blocks);
4705 : 0 : stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9);
4706 : 0 : return 0;
4707 : : }
4708 : :
4709 : 0 : static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
4710 : : int pextents)
4711 : : {
4712 [ - + ]: 474624 : if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4713 : 0 : return ext4_ind_trans_blocks(inode, lblocks);
4714 : 474624 : return ext4_ext_index_trans_blocks(inode, pextents);
4715 : : }
4716 : :
4717 : : /*
4718 : : * Account for index blocks, block groups bitmaps and block group
4719 : : * descriptor blocks if modify datablocks and index blocks
4720 : : * worse case, the indexs blocks spread over different block groups
4721 : : *
4722 : : * If datablocks are discontiguous, they are possible to spread over
4723 : : * different block groups too. If they are contiguous, with flexbg,
4724 : : * they could still across block group boundary.
4725 : : *
4726 : : * Also account for superblock, inode, quota and xattr blocks
4727 : : */
4728 : 0 : static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
4729 : : int pextents)
4730 : : {
4731 : 949226 : ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
4732 : : int gdpblocks;
4733 : : int idxblocks;
4734 : : int ret = 0;
4735 : :
4736 : : /*
4737 : : * How many index blocks need to touch to map @lblocks logical blocks
4738 : : * to @pextents physical extents?
4739 : : */
4740 : 474617 : idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents);
4741 : :
4742 : : ret = idxblocks;
4743 : :
4744 : : /*
4745 : : * Now let's see how many group bitmaps and group descriptors need
4746 : : * to account
4747 : : */
4748 : 474617 : groups = idxblocks + pextents;
4749 : : gdpblocks = groups;
4750 [ - + ]: 474617 : if (groups > ngroups)
4751 : : groups = ngroups;
4752 [ + + ]: 474617 : if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4753 : 129081 : gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4754 : :
4755 : : /* bitmaps and block group descriptor blocks */
4756 : 8 : ret += groups + gdpblocks;
4757 : :
4758 : : /* Blocks for super block, inode, quota and xattr blocks */
4759 [ + ][ + ]: 474617 : ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
4760 : :
4761 : 8 : return ret;
4762 : : }
4763 : :
4764 : : /*
4765 : : * Calculate the total number of credits to reserve to fit
4766 : : * the modification of a single pages into a single transaction,
4767 : : * which may include multiple chunks of block allocations.
4768 : : *
4769 : : * This could be called via ext4_write_begin()
4770 : : *
4771 : : * We need to consider the worse case, when
4772 : : * one new block per extent.
4773 : : */
4774 : 0 : int ext4_writepage_trans_blocks(struct inode *inode)
4775 : : {
4776 : : int bpp = ext4_journal_blocks_per_page(inode);
4777 : : int ret;
4778 : :
4779 : 105272 : ret = ext4_meta_trans_blocks(inode, bpp, bpp);
4780 : :
4781 : : /* Account for data blocks for journalled mode */
4782 [ + + ]: 105272 : if (ext4_should_journal_data(inode))
4783 : 56422 : ret += bpp;
4784 : 1 : return ret;
4785 : : }
4786 : :
4787 : : /*
4788 : : * Calculate the journal credits for a chunk of data modification.
4789 : : *
4790 : : * This is called from DIO, fallocate or whoever calling
4791 : : * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
4792 : : *
4793 : : * journal buffers for data blocks are not included here, as DIO
4794 : : * and fallocate do no need to journal data buffers.
4795 : : */
4796 : 0 : int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
4797 : : {
4798 : 214636 : return ext4_meta_trans_blocks(inode, nrblocks, 1);
4799 : : }
4800 : :
4801 : : /*
4802 : : * The caller must have previously called ext4_reserve_inode_write().
4803 : : * Give this, we know that the caller already has write access to iloc->bh.
4804 : : */
4805 : 0 : int ext4_mark_iloc_dirty(handle_t *handle,
4806 : : struct inode *inode, struct ext4_iloc *iloc)
4807 : : {
4808 : : int err = 0;
4809 : :
4810 [ - + ]: 10850550 : if (IS_I_VERSION(inode))
4811 : : inode_inc_iversion(inode);
4812 : :
4813 : : /* the do_update_inode consumes one bh->b_count */
4814 : 0 : get_bh(iloc->bh);
4815 : :
4816 : : /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
4817 : 10850384 : err = ext4_do_update_inode(handle, inode, iloc);
4818 : 10850761 : put_bh(iloc->bh);
4819 : 10851055 : return err;
4820 : : }
4821 : :
4822 : : /*
4823 : : * On success, We end up with an outstanding reference count against
4824 : : * iloc->bh. This _must_ be cleaned up later.
4825 : : */
4826 : :
4827 : : int
4828 : 0 : ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
4829 : : struct ext4_iloc *iloc)
4830 : : {
4831 : : int err;
4832 : :
4833 : : err = ext4_get_inode_loc(inode, iloc);
4834 [ + + ]: 10850838 : if (!err) {
4835 : : BUFFER_TRACE(iloc->bh, "get_write_access");
4836 : 10850036 : err = ext4_journal_get_write_access(handle, iloc->bh);
4837 [ - + ]: 10851053 : if (err) {
4838 : 0 : brelse(iloc->bh);
4839 : 0 : iloc->bh = NULL;
4840 : : }
4841 : : }
4842 [ - + ]: 10851855 : ext4_std_error(inode->i_sb, err);
4843 : 1264 : return err;
4844 : : }
4845 : :
4846 : : /*
4847 : : * Expand an inode by new_extra_isize bytes.
4848 : : * Returns 0 on success or negative error number on failure.
4849 : : */
4850 : 0 : static int ext4_expand_extra_isize(struct inode *inode,
4851 : : unsigned int new_extra_isize,
4852 : : struct ext4_iloc iloc,
4853 : : handle_t *handle)
4854 : : {
4855 : : struct ext4_inode *raw_inode;
4856 : : struct ext4_xattr_ibody_header *header;
4857 : :
4858 [ # # ]: 0 : if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
4859 : : return 0;
4860 : :
4861 : 0 : raw_inode = ext4_raw_inode(&iloc);
4862 : :
4863 : 0 : header = IHDR(inode, raw_inode);
4864 : :
4865 : : /* No extended attributes present */
4866 [ # # ][ # # ]: 0 : if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
4867 : 0 : header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
4868 [ # # ]: 0 : memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
4869 : : new_extra_isize);
4870 : 0 : EXT4_I(inode)->i_extra_isize = new_extra_isize;
4871 : : return 0;
4872 : : }
4873 : :
4874 : : /* try to expand with EAs present */
4875 : 0 : return ext4_expand_extra_isize_ea(inode, new_extra_isize,
4876 : : raw_inode, handle);
4877 : : }
4878 : :
4879 : : /*
4880 : : * What we do here is to mark the in-core inode as clean with respect to inode
4881 : : * dirtiness (it may still be data-dirty).
4882 : : * This means that the in-core inode may be reaped by prune_icache
4883 : : * without having to perform any I/O. This is a very good thing,
4884 : : * because *any* task may call prune_icache - even ones which
4885 : : * have a transaction open against a different journal.
4886 : : *
4887 : : * Is this cheating? Not really. Sure, we haven't written the
4888 : : * inode out, but prune_icache isn't a user-visible syncing function.
4889 : : * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
4890 : : * we start and wait on commits.
4891 : : */
4892 : 0 : int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
4893 : : {
4894 : 0 : struct ext4_iloc iloc;
4895 : 9697292 : struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4896 : : static unsigned int mnt_count;
4897 : : int err, ret;
4898 : :
4899 : : might_sleep();
4900 : 9697292 : trace_ext4_mark_inode_dirty(inode, _RET_IP_);
4901 : 9697127 : err = ext4_reserve_inode_write(handle, inode, &iloc);
4902 [ + + ][ - + ]: 9698003 : if (ext4_handle_valid(handle) &&
4903 [ # # ]: 0 : EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
4904 : : !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
4905 : : /*
4906 : : * We need extra buffer credits since we may write into EA block
4907 : : * with this same handle. If journal_extend fails, then it will
4908 : : * only result in a minor loss of functionality for that inode.
4909 : : * If this is felt to be critical, then e2fsck should be run to
4910 : : * force a large enough s_min_extra_isize.
4911 : : */
4912 [ # # ]: 0 : if ((jbd2_journal_extend(handle,
4913 [ # # ][ # # ]: 0 : EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) {
[ # # ]
4914 : 0 : ret = ext4_expand_extra_isize(inode,
4915 : : sbi->s_want_extra_isize,
4916 : : iloc, handle);
4917 [ # # ]: 0 : if (ret) {
4918 : : ext4_set_inode_state(inode,
4919 : : EXT4_STATE_NO_EXPAND);
4920 [ # # ]: 0 : if (mnt_count !=
4921 : 0 : le16_to_cpu(sbi->s_es->s_mnt_count)) {
4922 : 0 : ext4_warning(inode->i_sb,
4923 : : "Unable to expand inode %lu. Delete"
4924 : : " some EAs or run e2fsck.",
4925 : : inode->i_ino);
4926 : 0 : mnt_count =
4927 : 0 : le16_to_cpu(sbi->s_es->s_mnt_count);
4928 : : }
4929 : : }
4930 : : }
4931 : : }
4932 [ + + ]: 9698003 : if (!err)
4933 : 9697981 : err = ext4_mark_iloc_dirty(handle, inode, &iloc);
4934 : 605 : return err;
4935 : : }
4936 : :
4937 : : /*
4938 : : * ext4_dirty_inode() is called from __mark_inode_dirty()
4939 : : *
4940 : : * We're really interested in the case where a file is being extended.
4941 : : * i_size has been changed by generic_commit_write() and we thus need
4942 : : * to include the updated inode in the current transaction.
4943 : : *
4944 : : * Also, dquot_alloc_block() will always dirty the inode when blocks
4945 : : * are allocated to the file.
4946 : : *
4947 : : * If the inode is marked synchronous, we don't honour that here - doing
4948 : : * so would cause a commit on atime updates, which we don't bother doing.
4949 : : * We handle synchronous inodes at the highest possible level.
4950 : : */
4951 : 0 : void ext4_dirty_inode(struct inode *inode, int flags)
4952 : : {
4953 : : handle_t *handle;
4954 : :
4955 : : handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4956 [ + + ]: 4882956 : if (IS_ERR(handle))
4957 : : goto out;
4958 : :
4959 : 4882892 : ext4_mark_inode_dirty(handle, inode);
4960 : :
4961 : 4883007 : ext4_journal_stop(handle);
4962 : : out:
4963 : 175 : return;
4964 : : }
4965 : :
4966 : : #if 0
4967 : : /*
4968 : : * Bind an inode's backing buffer_head into this transaction, to prevent
4969 : : * it from being flushed to disk early. Unlike
4970 : : * ext4_reserve_inode_write, this leaves behind no bh reference and
4971 : : * returns no iloc structure, so the caller needs to repeat the iloc
4972 : : * lookup to mark the inode dirty later.
4973 : : */
4974 : : static int ext4_pin_inode(handle_t *handle, struct inode *inode)
4975 : : {
4976 : : struct ext4_iloc iloc;
4977 : :
4978 : : int err = 0;
4979 : : if (handle) {
4980 : : err = ext4_get_inode_loc(inode, &iloc);
4981 : : if (!err) {
4982 : : BUFFER_TRACE(iloc.bh, "get_write_access");
4983 : : err = jbd2_journal_get_write_access(handle, iloc.bh);
4984 : : if (!err)
4985 : : err = ext4_handle_dirty_metadata(handle,
4986 : : NULL,
4987 : : iloc.bh);
4988 : : brelse(iloc.bh);
4989 : : }
4990 : : }
4991 : : ext4_std_error(inode->i_sb, err);
4992 : : return err;
4993 : : }
4994 : : #endif
4995 : :
4996 : 0 : int ext4_change_inode_journal_flag(struct inode *inode, int val)
4997 : : {
4998 : 0 : journal_t *journal;
4999 : : handle_t *handle;
5000 : : int err;
5001 : :
5002 : : /*
5003 : : * We have to be very careful here: changing a data block's
5004 : : * journaling status dynamically is dangerous. If we write a
5005 : : * data block to the journal, change the status and then delete
5006 : : * that block, we risk forgetting to revoke the old log record
5007 : : * from the journal and so a subsequent replay can corrupt data.
5008 : : * So, first we make sure that the journal is empty and that
5009 : : * nobody is changing anything.
5010 : : */
5011 : :
5012 : 0 : journal = EXT4_JOURNAL(inode);
5013 [ # # ]: 0 : if (!journal)
5014 : : return 0;
5015 [ # # ]: 0 : if (is_journal_aborted(journal))
5016 : : return -EROFS;
5017 : : /* We have to allocate physical blocks for delalloc blocks
5018 : : * before flushing journal. otherwise delalloc blocks can not
5019 : : * be allocated any more. even more truncate on delalloc blocks
5020 : : * could trigger BUG by flushing delalloc blocks in journal.
5021 : : * There is no delalloc block in non-journal data mode.
5022 : : */
5023 [ # # ][ # # ]: 0 : if (val && test_opt(inode->i_sb, DELALLOC)) {
5024 : 0 : err = ext4_alloc_da_blocks(inode);
5025 [ # # ]: 0 : if (err < 0)
5026 : : return err;
5027 : : }
5028 : :
5029 : : /* Wait for all existing dio workers */
5030 : : ext4_inode_block_unlocked_dio(inode);
5031 : 0 : inode_dio_wait(inode);
5032 : :
5033 : 0 : jbd2_journal_lock_updates(journal);
5034 : :
5035 : : /*
5036 : : * OK, there are no updates running now, and all cached data is
5037 : : * synced to disk. We are now in a completely consistent state
5038 : : * which doesn't have anything in the journal, and we know that
5039 : : * no filesystem updates are running, so it is safe to modify
5040 : : * the inode's in-core data-journaling state flag now.
5041 : : */
5042 : :
5043 [ # # ]: 0 : if (val)
5044 : : ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
5045 : : else {
5046 : 0 : jbd2_journal_flush(journal);
5047 : : ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
5048 : : }
5049 : 0 : ext4_set_aops(inode);
5050 : :
5051 : 0 : jbd2_journal_unlock_updates(journal);
5052 : : ext4_inode_resume_unlocked_dio(inode);
5053 : :
5054 : : /* Finally we can mark the inode as dirty. */
5055 : :
5056 : : handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
5057 [ # # ]: 0 : if (IS_ERR(handle))
5058 : 0 : return PTR_ERR(handle);
5059 : :
5060 : 0 : err = ext4_mark_inode_dirty(handle, inode);
5061 : : ext4_handle_sync(handle);
5062 : 0 : ext4_journal_stop(handle);
5063 [ # # ]: 0 : ext4_std_error(inode->i_sb, err);
5064 : :
5065 : 0 : return err;
5066 : : }
5067 : :
5068 : 0 : static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
5069 : : {
5070 : 0 : return !buffer_mapped(bh);
5071 : : }
5072 : :
5073 : 0 : int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5074 : : {
5075 : 264358 : struct page *page = vmf->page;
5076 : : loff_t size;
5077 : : unsigned long len;
5078 : : int ret;
5079 : 264358 : struct file *file = vma->vm_file;
5080 : 0 : struct inode *inode = file_inode(file);
5081 : 264358 : struct address_space *mapping = inode->i_mapping;
5082 : : handle_t *handle;
5083 : : get_block_t *get_block;
5084 : 264358 : int retries = 0;
5085 : :
5086 : 528725 : sb_start_pagefault(inode->i_sb);
5087 : 264362 : file_update_time(vma->vm_file);
5088 : : /* Delalloc case is easy... */
5089 [ + ][ + + ]: 528735 : if (test_opt(inode->i_sb, DELALLOC) &&
5090 [ + + ]: 264368 : !ext4_should_journal_data(inode) &&
5091 : 264362 : !ext4_nonda_switch(inode->i_sb)) {
5092 : : do {
5093 : 264342 : ret = __block_page_mkwrite(vma, vmf,
5094 : : ext4_da_get_block_prep);
5095 [ # # ]: 0 : } while (ret == -ENOSPC &&
5096 [ - + ]: 264352 : ext4_should_retry_alloc(inode->i_sb, &retries));
5097 : : goto out_ret;
5098 : : }
5099 : :
5100 : : lock_page(page);
5101 : : size = i_size_read(inode);
5102 : : /* Page got truncated from under us? */
5103 [ # # ][ # # ]: 0 : if (page->mapping != mapping || page_offset(page) > size) {
5104 : 0 : unlock_page(page);
5105 : : ret = VM_FAULT_NOPAGE;
5106 : 0 : goto out;
5107 : : }
5108 : :
5109 [ # # ]: 0 : if (page->index == size >> PAGE_CACHE_SHIFT)
5110 : 0 : len = size & ~PAGE_CACHE_MASK;
5111 : : else
5112 : : len = PAGE_CACHE_SIZE;
5113 : : /*
5114 : : * Return if we have all the buffers mapped. This avoids the need to do
5115 : : * journal_start/journal_stop which can block and take a long time
5116 : : */
5117 [ # # ]: 0 : if (page_has_buffers(page)) {
5118 [ # # ][ # # ]: 0 : if (!ext4_walk_page_buffers(NULL, page_buffers(page),
5119 : : 0, len, NULL,
5120 : : ext4_bh_unmapped)) {
5121 : : /* Wait so that we don't change page under IO */
5122 : 0 : wait_for_stable_page(page);
5123 : : ret = VM_FAULT_LOCKED;
5124 : 0 : goto out;
5125 : : }
5126 : : }
5127 : 0 : unlock_page(page);
5128 : : /* OK, we need to fill the hole... */
5129 [ # # ]: 0 : if (ext4_should_dioread_nolock(inode))
5130 : : get_block = ext4_get_block_write;
5131 : : else
5132 : : get_block = ext4_get_block;
5133 : : retry_alloc:
5134 : 0 : handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
5135 : : ext4_writepage_trans_blocks(inode));
5136 [ # # ]: 0 : if (IS_ERR(handle)) {
5137 : : ret = VM_FAULT_SIGBUS;
5138 : : goto out;
5139 : : }
5140 : 0 : ret = __block_page_mkwrite(vma, vmf, get_block);
5141 [ # # ][ # # ]: 0 : if (!ret && ext4_should_journal_data(inode)) {
5142 [ # # ][ # # ]: 0 : if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
5143 : : PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
5144 : 0 : unlock_page(page);
5145 : : ret = VM_FAULT_SIGBUS;
5146 : 0 : ext4_journal_stop(handle);
5147 : 0 : goto out;
5148 : : }
5149 : : ext4_set_inode_state(inode, EXT4_STATE_JDATA);
5150 : : }
5151 : 0 : ext4_journal_stop(handle);
5152 [ # # ][ # # ]: 0 : if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
5153 : : goto retry_alloc;
5154 : : out_ret:
5155 : : ret = block_page_mkwrite_return(ret);
5156 : : out:
5157 : 264352 : sb_end_pagefault(inode->i_sb);
5158 : 264324 : return ret;
5159 : : }
|