Branch data Line data Source code
1 : : /*
2 : : * linux/fs/ext3/balloc.c
3 : : *
4 : : * Copyright (C) 1992, 1993, 1994, 1995
5 : : * Remy Card (card@masi.ibp.fr)
6 : : * Laboratoire MASI - Institut Blaise Pascal
7 : : * Universite Pierre et Marie Curie (Paris VI)
8 : : *
9 : : * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 : : * Big-endian to little-endian byte-swapping/bitmaps by
11 : : * David S. Miller (davem@caip.rutgers.edu), 1995
12 : : */
13 : :
14 : : #include <linux/quotaops.h>
15 : : #include <linux/blkdev.h>
16 : : #include "ext3.h"
17 : :
18 : : /*
19 : : * balloc.c contains the blocks allocation and deallocation routines
20 : : */
21 : :
22 : : /*
23 : : * The free blocks are managed by bitmaps. A file system contains several
24 : : * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
25 : : * block for inodes, N blocks for the inode table and data blocks.
26 : : *
27 : : * The file system contains group descriptors which are located after the
28 : : * super block. Each descriptor contains the number of the bitmap block and
29 : : * the free blocks count in the block. The descriptors are loaded in memory
30 : : * when a file system is mounted (see ext3_fill_super).
31 : : */
32 : :
33 : :
34 : : #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
35 : :
36 : : /*
37 : : * Calculate the block group number and offset, given a block number
38 : : */
39 : 0 : static void ext3_get_group_no_and_offset(struct super_block *sb,
40 : : ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp)
41 : : {
42 : 0 : struct ext3_super_block *es = EXT3_SB(sb)->s_es;
43 : :
44 : 0 : blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
45 : : if (offsetp)
46 : 0 : *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb);
47 : : if (blockgrpp)
48 : 0 : *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb);
49 : : }
50 : :
51 : : /**
52 : : * ext3_get_group_desc() -- load group descriptor from disk
53 : : * @sb: super block
54 : : * @block_group: given block group
55 : : * @bh: pointer to the buffer head to store the block
56 : : * group descriptor
57 : : */
58 : 0 : struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
59 : : unsigned int block_group,
60 : : struct buffer_head ** bh)
61 : : {
62 : : unsigned long group_desc;
63 : : unsigned long offset;
64 : : struct ext3_group_desc * desc;
65 : : struct ext3_sb_info *sbi = EXT3_SB(sb);
66 : :
67 [ - + ]: 189 : if (block_group >= sbi->s_groups_count) {
68 : 0 : ext3_error (sb, "ext3_get_group_desc",
69 : : "block_group >= groups_count - "
70 : : "block_group = %d, groups_count = %lu",
71 : : block_group, sbi->s_groups_count);
72 : :
73 : 0 : return NULL;
74 : : }
75 : 189 : smp_rmb();
76 : :
77 : 378 : group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
78 : 378 : offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
79 [ - + ]: 378 : if (!sbi->s_group_desc[group_desc]) {
80 : 0 : ext3_error (sb, "ext3_get_group_desc",
81 : : "Group descriptor not loaded - "
82 : : "block_group = %d, group_desc = %lu, desc = %lu",
83 : : block_group, group_desc, offset);
84 : 0 : return NULL;
85 : : }
86 : :
87 : 189 : desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
88 [ + + ]: 189 : if (bh)
89 : 23 : *bh = sbi->s_group_desc[group_desc];
90 : 189 : return desc + offset;
91 : : }
92 : :
93 : 1 : static int ext3_valid_block_bitmap(struct super_block *sb,
94 : : struct ext3_group_desc *desc,
95 : : unsigned int block_group,
96 : : struct buffer_head *bh)
97 : : {
98 : : ext3_grpblk_t offset;
99 : : ext3_grpblk_t next_zero_bit;
100 : : ext3_fsblk_t bitmap_blk;
101 : : ext3_fsblk_t group_first_block;
102 : :
103 : : group_first_block = ext3_group_first_block_no(sb, block_group);
104 : :
105 : : /* check whether block bitmap block number is set */
106 : 1 : bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
107 : 1 : offset = bitmap_blk - group_first_block;
108 [ + - ]: 1 : if (!ext3_test_bit(offset, bh->b_data))
109 : : /* bad block bitmap */
110 : : goto err_out;
111 : :
112 : : /* check whether the inode bitmap block number is set */
113 : 1 : bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap);
114 : 1 : offset = bitmap_blk - group_first_block;
115 [ + - ]: 1 : if (!ext3_test_bit(offset, bh->b_data))
116 : : /* bad block bitmap */
117 : : goto err_out;
118 : :
119 : : /* check whether the inode table block number is set */
120 : 1 : bitmap_blk = le32_to_cpu(desc->bg_inode_table);
121 : 1 : offset = bitmap_blk - group_first_block;
122 : 1 : next_zero_bit = ext3_find_next_zero_bit(bh->b_data,
123 : 1 : offset + EXT3_SB(sb)->s_itb_per_group,
124 : : offset);
125 [ - + ]: 1 : if (next_zero_bit >= offset + EXT3_SB(sb)->s_itb_per_group)
126 : : /* good bitmap for inode tables */
127 : : return 1;
128 : :
129 : : err_out:
130 : 0 : ext3_error(sb, __func__,
131 : : "Invalid block bitmap - "
132 : : "block_group = %d, block = %lu",
133 : : block_group, bitmap_blk);
134 : : return 0;
135 : : }
136 : :
137 : : /**
138 : : * read_block_bitmap()
139 : : * @sb: super block
140 : : * @block_group: given block group
141 : : *
142 : : * Read the bitmap for a given block_group,and validate the
143 : : * bits for block/inode/inode tables are set in the bitmaps
144 : : *
145 : : * Return buffer_head on success or NULL in case of failure.
146 : : */
147 : : static struct buffer_head *
148 : 0 : read_block_bitmap(struct super_block *sb, unsigned int block_group)
149 : : {
150 : : struct ext3_group_desc * desc;
151 : 1 : struct buffer_head * bh = NULL;
152 : : ext3_fsblk_t bitmap_blk;
153 : :
154 : 11 : desc = ext3_get_group_desc(sb, block_group, NULL);
155 [ + - ]: 11 : if (!desc)
156 : : return NULL;
157 : : trace_ext3_read_block_bitmap(sb, block_group);
158 : 0 : bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
159 : 0 : bh = sb_getblk(sb, bitmap_blk);
160 [ - + ]: 11 : if (unlikely(!bh)) {
161 : 0 : ext3_error(sb, __func__,
162 : : "Cannot read block bitmap - "
163 : : "block_group = %d, block_bitmap = %u",
164 : : block_group, le32_to_cpu(desc->bg_block_bitmap));
165 : 0 : return NULL;
166 : : }
167 [ + + ]: 11 : if (likely(bh_uptodate_or_lock(bh)))
168 : : return bh;
169 : :
170 [ - + ]: 1 : if (bh_submit_read(bh) < 0) {
171 : : brelse(bh);
172 : 0 : ext3_error(sb, __func__,
173 : : "Cannot read block bitmap - "
174 : : "block_group = %d, block_bitmap = %u",
175 : : block_group, le32_to_cpu(desc->bg_block_bitmap));
176 : 0 : return NULL;
177 : : }
178 : 1 : ext3_valid_block_bitmap(sb, desc, block_group, bh);
179 : : /*
180 : : * file system mounted not to panic on error, continue with corrupt
181 : : * bitmap
182 : : */
183 : 1 : return bh;
184 : : }
185 : : /*
186 : : * The reservation window structure operations
187 : : * --------------------------------------------
188 : : * Operations include:
189 : : * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
190 : : *
191 : : * We use a red-black tree to represent per-filesystem reservation
192 : : * windows.
193 : : *
194 : : */
195 : :
196 : : /**
197 : : * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
198 : : * @rb_root: root of per-filesystem reservation rb tree
199 : : * @verbose: verbose mode
200 : : * @fn: function which wishes to dump the reservation map
201 : : *
202 : : * If verbose is turned on, it will print the whole block reservation
203 : : * windows(start, end). Otherwise, it will only print out the "bad" windows,
204 : : * those windows that overlap with their immediate neighbors.
205 : : */
206 : : #if 1
207 : 0 : static void __rsv_window_dump(struct rb_root *root, int verbose,
208 : : const char *fn)
209 : : {
210 : : struct rb_node *n;
211 : : struct ext3_reserve_window_node *rsv, *prev;
212 : : int bad;
213 : :
214 : : restart:
215 : 0 : n = rb_first(root);
216 : : bad = 0;
217 : : prev = NULL;
218 : :
219 : 0 : printk("Block Allocation Reservation Windows Map (%s):\n", fn);
220 [ # # ]: 0 : while (n) {
221 : : rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
222 [ # # ]: 0 : if (verbose)
223 : 0 : printk("reservation window 0x%p "
224 : : "start: %lu, end: %lu\n",
225 : : rsv, rsv->rsv_start, rsv->rsv_end);
226 [ # # ][ # # ]: 0 : if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
227 : 0 : printk("Bad reservation %p (start >= end)\n",
228 : : rsv);
229 : : bad = 1;
230 : : }
231 [ # # ][ # # ]: 0 : if (prev && prev->rsv_end >= rsv->rsv_start) {
232 : 0 : printk("Bad reservation %p (prev->end >= start)\n",
233 : : rsv);
234 : : bad = 1;
235 : : }
236 [ # # ]: 0 : if (bad) {
237 [ # # ]: 0 : if (!verbose) {
238 : 0 : printk("Restarting reservation walk in verbose mode\n");
239 : : verbose = 1;
240 : 0 : goto restart;
241 : : }
242 : : }
243 : 0 : n = rb_next(n);
244 : : prev = rsv;
245 : : }
246 : 0 : printk("Window map complete.\n");
247 [ # # ]: 0 : BUG_ON(bad);
248 : 0 : }
249 : : #define rsv_window_dump(root, verbose) \
250 : : __rsv_window_dump((root), (verbose), __func__)
251 : : #else
252 : : #define rsv_window_dump(root, verbose) do {} while (0)
253 : : #endif
254 : :
255 : : /**
256 : : * goal_in_my_reservation()
257 : : * @rsv: inode's reservation window
258 : : * @grp_goal: given goal block relative to the allocation block group
259 : : * @group: the current allocation block group
260 : : * @sb: filesystem super block
261 : : *
262 : : * Test if the given goal block (group relative) is within the file's
263 : : * own block reservation window range.
264 : : *
265 : : * If the reservation window is outside the goal allocation group, return 0;
266 : : * grp_goal (given goal block) could be -1, which means no specific
267 : : * goal block. In this case, always return 1.
268 : : * If the goal block is within the reservation window, return 1;
269 : : * otherwise, return 0;
270 : : */
271 : : static int
272 : 0 : goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
273 : : unsigned int group, struct super_block * sb)
274 : : {
275 : : ext3_fsblk_t group_first_block, group_last_block;
276 : :
277 : : group_first_block = ext3_group_first_block_no(sb, group);
278 : 6 : group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
279 : :
280 [ + - ][ + - ]: 6 : if ((rsv->_rsv_start > group_last_block) ||
281 : 6 : (rsv->_rsv_end < group_first_block))
282 : : return 0;
283 [ + - ][ + + ]: 6 : if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
284 [ + - ]: 3 : || (grp_goal + group_first_block > rsv->_rsv_end)))
285 : : return 0;
286 : 3 : return 1;
287 : : }
288 : :
289 : : /**
290 : : * search_reserve_window()
291 : : * @rb_root: root of reservation tree
292 : : * @goal: target allocation block
293 : : *
294 : : * Find the reserved window which includes the goal, or the previous one
295 : : * if the goal is not in any window.
296 : : * Returns NULL if there are no windows or if all windows start after the goal.
297 : : */
298 : : static struct ext3_reserve_window_node *
299 : 0 : search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
300 : : {
301 : 3 : struct rb_node *n = root->rb_node;
302 : : struct ext3_reserve_window_node *rsv;
303 : :
304 [ + ]: 3 : if (!n)
305 : : return NULL;
306 : :
307 : : do {
308 : : rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
309 : :
310 [ - + ]: 6 : if (goal < rsv->rsv_start)
311 : 0 : n = n->rb_left;
312 [ + - ]: 3 : else if (goal > rsv->rsv_end)
313 : 3 : n = n->rb_right;
314 : : else
315 : : return rsv;
316 [ - + ]: 3 : } while (n);
317 : : /*
318 : : * We've fallen off the end of the tree: the goal wasn't inside
319 : : * any particular node. OK, the previous node must be to one
320 : : * side of the interval containing the goal. If it's the RHS,
321 : : * we need to back up one.
322 : : */
323 [ - + ]: 3 : if (rsv->rsv_start > goal) {
324 : 0 : n = rb_prev(&rsv->rsv_node);
325 : : rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
326 : : }
327 : 3 : return rsv;
328 : : }
329 : :
330 : : /**
331 : : * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
332 : : * @sb: super block
333 : : * @rsv: reservation window to add
334 : : *
335 : : * Must be called with rsv_lock hold.
336 : : */
337 : 0 : void ext3_rsv_window_add(struct super_block *sb,
338 : : struct ext3_reserve_window_node *rsv)
339 : : {
340 : 5 : struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
341 : 5 : struct rb_node *node = &rsv->rsv_node;
342 : 5 : ext3_fsblk_t start = rsv->rsv_start;
343 : :
344 : 5 : struct rb_node ** p = &root->rb_node;
345 : : struct rb_node * parent = NULL;
346 : : struct ext3_reserve_window_node *this;
347 : :
348 : : trace_ext3_rsv_window_add(sb, rsv);
349 [ + + ]: 8 : while (*p)
350 : : {
351 : : parent = *p;
352 : : this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node);
353 : :
354 [ - + ]: 3 : if (start < this->rsv_start)
355 : 0 : p = &(*p)->rb_left;
356 [ + - ]: 3 : else if (start > this->rsv_end)
357 : 3 : p = &(*p)->rb_right;
358 : : else {
359 : 0 : rsv_window_dump(root, 1);
360 : 3 : BUG();
361 : : }
362 : : }
363 : :
364 : : rb_link_node(node, parent, p);
365 : 5 : rb_insert_color(node, root);
366 : 5 : }
367 : :
368 : : /**
369 : : * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
370 : : * @sb: super block
371 : : * @rsv: reservation window to remove
372 : : *
373 : : * Mark the block reservation window as not allocated, and unlink it
374 : : * from the filesystem reservation window rb tree. Must be called with
375 : : * rsv_lock hold.
376 : : */
377 : 0 : static void rsv_window_remove(struct super_block *sb,
378 : : struct ext3_reserve_window_node *rsv)
379 : : {
380 : 0 : rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
381 : 0 : rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
382 : 0 : rsv->rsv_alloc_hit = 0;
383 : 3 : rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
384 : : }
385 : :
386 : : /*
387 : : * rsv_is_empty() -- Check if the reservation window is allocated.
388 : : * @rsv: given reservation window to check
389 : : *
390 : : * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
391 : : */
392 : : static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
393 : : {
394 : : /* a valid reservation end block could not be 0 */
395 : : return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
396 : : }
397 : :
398 : : /**
399 : : * ext3_init_block_alloc_info()
400 : : * @inode: file inode structure
401 : : *
402 : : * Allocate and initialize the reservation window structure, and
403 : : * link the window to the ext3 inode structure at last
404 : : *
405 : : * The reservation window structure is only dynamically allocated
406 : : * and linked to ext3 inode the first time the open file
407 : : * needs a new block. So, before every ext3_new_block(s) call, for
408 : : * regular files, we should check whether the reservation window
409 : : * structure exists or not. In the latter case, this function is called.
410 : : * Fail to do so will result in block reservation being turned off for that
411 : : * open file.
412 : : *
413 : : * This function is called from ext3_get_blocks_handle(), also called
414 : : * when setting the reservation window size through ioctl before the file
415 : : * is open for write (needs block allocation).
416 : : *
417 : : * Needs truncate_mutex protection prior to call this function.
418 : : */
419 : 0 : void ext3_init_block_alloc_info(struct inode *inode)
420 : : {
421 : : struct ext3_inode_info *ei = EXT3_I(inode);
422 : : struct ext3_block_alloc_info *block_i;
423 : 6 : struct super_block *sb = inode->i_sb;
424 : :
425 : : block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
426 [ + ]: 3 : if (block_i) {
427 : : struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
428 : :
429 : 3 : rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
430 : 3 : rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
431 : :
432 : : /*
433 : : * if filesystem is mounted with NORESERVATION, the goal
434 : : * reservation window size is set to zero to indicate
435 : : * block reservation is off
436 : : */
437 [ - + ]: 6 : if (!test_opt(sb, RESERVATION))
438 : 0 : rsv->rsv_goal_size = 0;
439 : : else
440 : 3 : rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
441 : 3 : rsv->rsv_alloc_hit = 0;
442 : 3 : block_i->last_alloc_logical_block = 0;
443 : 3 : block_i->last_alloc_physical_block = 0;
444 : : }
445 : 0 : ei->i_block_alloc_info = block_i;
446 : 0 : }
447 : :
448 : : /**
449 : : * ext3_discard_reservation()
450 : : * @inode: inode
451 : : *
452 : : * Discard(free) block reservation window on last file close, or truncate
453 : : * or at last iput().
454 : : *
455 : : * It is being called in three cases:
456 : : * ext3_release_file(): last writer close the file
457 : : * ext3_clear_inode(): last iput(), when nobody link to this file.
458 : : * ext3_truncate(): when the block indirect map is about to change.
459 : : *
460 : : */
461 : 0 : void ext3_discard_reservation(struct inode *inode)
462 : : {
463 : : struct ext3_inode_info *ei = EXT3_I(inode);
464 : 19 : struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
465 : : struct ext3_reserve_window_node *rsv;
466 : 19 : spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
467 : :
468 [ + + ]: 19 : if (!block_i)
469 : 19 : return;
470 : :
471 : 6 : rsv = &block_i->rsv_window_node;
472 [ + + ]: 6 : if (!rsv_is_empty(&rsv->rsv_window)) {
473 : : spin_lock(rsv_lock);
474 [ + - ]: 3 : if (!rsv_is_empty(&rsv->rsv_window)) {
475 : : trace_ext3_discard_reservation(inode, rsv);
476 : 0 : rsv_window_remove(inode->i_sb, rsv);
477 : : }
478 : : spin_unlock(rsv_lock);
479 : : }
480 : : }
481 : :
482 : : /**
483 : : * ext3_free_blocks_sb() -- Free given blocks and update quota
484 : : * @handle: handle to this transaction
485 : : * @sb: super block
486 : : * @block: start physical block to free
487 : : * @count: number of blocks to free
488 : : * @pdquot_freed_blocks: pointer to quota
489 : : */
490 : 0 : void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
491 : : ext3_fsblk_t block, unsigned long count,
492 : : unsigned long *pdquot_freed_blocks)
493 : : {
494 : 7 : struct buffer_head *bitmap_bh = NULL;
495 : : struct buffer_head *gd_bh;
496 : : unsigned long block_group;
497 : : ext3_grpblk_t bit;
498 : : unsigned long i;
499 : : unsigned long overflow;
500 : : struct ext3_group_desc * desc;
501 : : struct ext3_super_block * es;
502 : 8 : struct ext3_sb_info *sbi;
503 : : int err = 0, ret;
504 : : ext3_grpblk_t group_freed;
505 : :
506 : 4 : *pdquot_freed_blocks = 0;
507 : : sbi = EXT3_SB(sb);
508 : 4 : es = sbi->s_es;
509 [ + - ][ + - ]: 4 : if (block < le32_to_cpu(es->s_first_data_block) ||
510 [ - + ]: 4 : block + count < block ||
511 : 4 : block + count > le32_to_cpu(es->s_blocks_count)) {
512 : 0 : ext3_error (sb, "ext3_free_blocks",
513 : : "Freeing blocks not in datazone - "
514 : : "block = "E3FSBLK", count = %lu", block, count);
515 : 4 : goto error_return;
516 : : }
517 : :
518 : : ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
519 : :
520 : : do_more:
521 : : overflow = 0;
522 : 8 : block_group = (block - le32_to_cpu(es->s_first_data_block)) /
523 : 4 : EXT3_BLOCKS_PER_GROUP(sb);
524 : 4 : bit = (block - le32_to_cpu(es->s_first_data_block)) %
525 : : EXT3_BLOCKS_PER_GROUP(sb);
526 : : /*
527 : : * Check to see if we are freeing blocks across a group
528 : : * boundary.
529 : : */
530 [ - + ]: 4 : if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
531 : 0 : overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
532 : 0 : count -= overflow;
533 : : }
534 : : brelse(bitmap_bh);
535 : 4 : bitmap_bh = read_block_bitmap(sb, block_group);
536 [ + - ]: 4 : if (!bitmap_bh)
537 : : goto error_return;
538 : 4 : desc = ext3_get_group_desc (sb, block_group, &gd_bh);
539 [ + - ]: 4 : if (!desc)
540 : : goto error_return;
541 : :
542 [ - + ][ # # ]: 4 : if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
[ - + ]
543 [ # # ][ + - ]: 4 : in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
544 [ + - ]: 4 : in_range (block, le32_to_cpu(desc->bg_inode_table),
545 [ + - ]: 4 : sbi->s_itb_per_group) ||
546 [ - + ]: 4 : in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
547 : : sbi->s_itb_per_group)) {
548 : 0 : ext3_error (sb, "ext3_free_blocks",
549 : : "Freeing blocks in system zones - "
550 : : "Block = "E3FSBLK", count = %lu",
551 : : block, count);
552 : 0 : goto error_return;
553 : : }
554 : :
555 : : /*
556 : : * We are about to start releasing blocks in the bitmap,
557 : : * so we need undo access.
558 : : */
559 : : /* @@@ check errors */
560 : : BUFFER_TRACE(bitmap_bh, "getting undo access");
561 : 4 : err = ext3_journal_get_undo_access(handle, bitmap_bh);
562 [ + - ]: 4 : if (err)
563 : : goto error_return;
564 : :
565 : : /*
566 : : * We are about to modify some metadata. Call the journal APIs
567 : : * to unshare ->b_data if a currently-committing transaction is
568 : : * using it
569 : : */
570 : : BUFFER_TRACE(gd_bh, "get_write_access");
571 : 4 : err = ext3_journal_get_write_access(handle, gd_bh);
572 [ + - ]: 4 : if (err)
573 : : goto error_return;
574 : :
575 : : jbd_lock_bh_state(bitmap_bh);
576 : :
577 [ + + ]: 11 : for (i = 0, group_freed = 0; i < count; i++) {
578 : : /*
579 : : * An HJ special. This is expensive...
580 : : */
581 : : #ifdef CONFIG_JBD_DEBUG
582 : : jbd_unlock_bh_state(bitmap_bh);
583 : : {
584 : : struct buffer_head *debug_bh;
585 : : debug_bh = sb_find_get_block(sb, block + i);
586 : : if (debug_bh) {
587 : : BUFFER_TRACE(debug_bh, "Deleted!");
588 : : if (!bh2jh(bitmap_bh)->b_committed_data)
589 : : BUFFER_TRACE(debug_bh,
590 : : "No committed data in bitmap");
591 : : BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
592 : : __brelse(debug_bh);
593 : : }
594 : : }
595 : : jbd_lock_bh_state(bitmap_bh);
596 : : #endif
597 [ - + ]: 7 : if (need_resched()) {
598 : : jbd_unlock_bh_state(bitmap_bh);
599 : 0 : cond_resched();
600 : : jbd_lock_bh_state(bitmap_bh);
601 : : }
602 : : /* @@@ This prevents newly-allocated data from being
603 : : * freed and then reallocated within the same
604 : : * transaction.
605 : : *
606 : : * Ideally we would want to allow that to happen, but to
607 : : * do so requires making journal_forget() capable of
608 : : * revoking the queued write of a data block, which
609 : : * implies blocking on the journal lock. *forget()
610 : : * cannot block due to truncate races.
611 : : *
612 : : * Eventually we can fix this by making journal_forget()
613 : : * return a status indicating whether or not it was able
614 : : * to revoke the buffer. On successful revoke, it is
615 : : * safe not to set the allocation bit in the committed
616 : : * bitmap, because we know that there is no outstanding
617 : : * activity on the buffer any more and so it is safe to
618 : : * reallocate it.
619 : : */
620 : : BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
621 [ - + ]: 7 : J_ASSERT_BH(bitmap_bh,
622 : : bh2jh(bitmap_bh)->b_committed_data != NULL);
623 : 7 : ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
624 : : bh2jh(bitmap_bh)->b_committed_data);
625 : :
626 : : /*
627 : : * We clear the bit in the bitmap after setting the committed
628 : : * data bit, because this is the reverse order to that which
629 : : * the allocator uses.
630 : : */
631 : : BUFFER_TRACE(bitmap_bh, "clear bit");
632 [ - + ]: 7 : if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
633 : : bit + i, bitmap_bh->b_data)) {
634 : : jbd_unlock_bh_state(bitmap_bh);
635 : 0 : ext3_error(sb, __func__,
636 : : "bit already cleared for block "E3FSBLK,
637 : : block + i);
638 : : jbd_lock_bh_state(bitmap_bh);
639 : : BUFFER_TRACE(bitmap_bh, "bit already cleared");
640 : : } else {
641 : 7 : group_freed++;
642 : : }
643 : : }
644 : : jbd_unlock_bh_state(bitmap_bh);
645 : :
646 : 4 : spin_lock(sb_bgl_lock(sbi, block_group));
647 : 4 : le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
648 : 4 : spin_unlock(sb_bgl_lock(sbi, block_group));
649 : 4 : percpu_counter_add(&sbi->s_freeblocks_counter, count);
650 : :
651 : : /* We dirtied the bitmap block */
652 : : BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
653 : 4 : err = ext3_journal_dirty_metadata(handle, bitmap_bh);
654 : :
655 : : /* And the group descriptor block */
656 : : BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
657 : 4 : ret = ext3_journal_dirty_metadata(handle, gd_bh);
658 [ + - ]: 4 : if (!err) err = ret;
659 : 4 : *pdquot_freed_blocks += group_freed;
660 : :
661 [ - + ]: 4 : if (overflow && !err) {
662 : : block += count;
663 : : count = overflow;
664 : : goto do_more;
665 : : }
666 : :
667 : : error_return:
668 : : brelse(bitmap_bh);
669 [ - + ]: 4 : ext3_std_error(sb, err);
670 : 4 : return;
671 : : }
672 : :
673 : : /**
674 : : * ext3_free_blocks() -- Free given blocks and update quota
675 : : * @handle: handle for this transaction
676 : : * @inode: inode
677 : : * @block: start physical block to free
678 : : * @count: number of blocks to count
679 : : */
680 : 0 : void ext3_free_blocks(handle_t *handle, struct inode *inode,
681 : : ext3_fsblk_t block, unsigned long count)
682 : : {
683 : 4 : struct super_block *sb = inode->i_sb;
684 : : unsigned long dquot_freed_blocks;
685 : :
686 : : trace_ext3_free_blocks(inode, block, count);
687 : 4 : ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
688 [ + - ]: 4 : if (dquot_freed_blocks)
689 : 4 : dquot_free_block(inode, dquot_freed_blocks);
690 : 4 : return;
691 : : }
692 : :
693 : : /**
694 : : * ext3_test_allocatable()
695 : : * @nr: given allocation block group
696 : : * @bh: bufferhead contains the bitmap of the given block group
697 : : *
698 : : * For ext3 allocations, we must not reuse any blocks which are
699 : : * allocated in the bitmap buffer's "last committed data" copy. This
700 : : * prevents deletes from freeing up the page for reuse until we have
701 : : * committed the delete transaction.
702 : : *
703 : : * If we didn't do this, then deleting something and reallocating it as
704 : : * data would allow the old block to be overwritten before the
705 : : * transaction committed (because we force data to disk before commit).
706 : : * This would lead to corruption if we crashed between overwriting the
707 : : * data and committing the delete.
708 : : *
709 : : * @@@ We may want to make this allocation behaviour conditional on
710 : : * data-writes at some point, and disable it for metadata allocations or
711 : : * sync-data inodes.
712 : : */
713 : 0 : static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
714 : : {
715 : : int ret;
716 : : struct journal_head *jh = bh2jh(bh);
717 : :
718 [ + + ]: 17 : if (ext3_test_bit(nr, bh->b_data))
719 : : return 0;
720 : :
721 : : jbd_lock_bh_state(bh);
722 [ + - ]: 15 : if (!jh->b_committed_data)
723 : : ret = 1;
724 : : else
725 : 15 : ret = !ext3_test_bit(nr, jh->b_committed_data);
726 : : jbd_unlock_bh_state(bh);
727 : 15 : return ret;
728 : : }
729 : :
730 : : /**
731 : : * bitmap_search_next_usable_block()
732 : : * @start: the starting block (group relative) of the search
733 : : * @bh: bufferhead contains the block group bitmap
734 : : * @maxblocks: the ending block (group relative) of the reservation
735 : : *
736 : : * The bitmap search --- search forward alternately through the actual
737 : : * bitmap on disk and the last-committed copy in journal, until we find a
738 : : * bit free in both bitmaps.
739 : : */
740 : : static ext3_grpblk_t
741 : 0 : bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
742 : : ext3_grpblk_t maxblocks)
743 : : {
744 : : ext3_grpblk_t next;
745 : : struct journal_head *jh = bh2jh(bh);
746 : :
747 [ + - ]: 6 : while (start < maxblocks) {
748 : 6 : next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
749 [ + - ]: 6 : if (next >= maxblocks)
750 : : return -1;
751 [ - + ]: 6 : if (ext3_test_allocatable(next, bh))
752 : : return next;
753 : : jbd_lock_bh_state(bh);
754 [ # # ]: 0 : if (jh->b_committed_data)
755 : : start = ext3_find_next_zero_bit(jh->b_committed_data,
756 : : maxblocks, next);
757 : : jbd_unlock_bh_state(bh);
758 : : }
759 : : return -1;
760 : : }
761 : :
762 : : /**
763 : : * find_next_usable_block()
764 : : * @start: the starting block (group relative) to find next
765 : : * allocatable block in bitmap.
766 : : * @bh: bufferhead contains the block group bitmap
767 : : * @maxblocks: the ending block (group relative) for the search
768 : : *
769 : : * Find an allocatable block in a bitmap. We honor both the bitmap and
770 : : * its last-committed copy (if that exists), and perform the "most
771 : : * appropriate allocation" algorithm of looking for a free block near
772 : : * the initial goal; then for a free byte somewhere in the bitmap; then
773 : : * for any free bit in the bitmap.
774 : : */
775 : : static ext3_grpblk_t
776 : 0 : find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
777 : : ext3_grpblk_t maxblocks)
778 : : {
779 : : ext3_grpblk_t here, next;
780 : : char *p, *r;
781 : :
782 [ + + ]: 4 : if (start > 0) {
783 : : /*
784 : : * The goal was occupied; search forward for a free
785 : : * block within the next XX blocks.
786 : : *
787 : : * end_goal is more or less random, but it has to be
788 : : * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
789 : : * next 64-bit boundary is simple..
790 : : */
791 : 3 : ext3_grpblk_t end_goal = (start + 63) & ~63;
792 [ + - ]: 3 : if (end_goal > maxblocks)
793 : : end_goal = maxblocks;
794 : 3 : here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
795 [ + - ][ - + ]: 3 : if (here < end_goal && ext3_test_allocatable(here, bh))
796 : : return here;
797 : : ext3_debug("Bit not found near goal\n");
798 : : }
799 : :
800 : : here = start;
801 [ - + ]: 1 : if (here < 0)
802 : : here = 0;
803 : :
804 : 1 : p = bh->b_data + (here >> 3);
805 : 1 : r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
806 : 1 : next = (r - bh->b_data) << 3;
807 : :
808 [ + - ][ - + ]: 1 : if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
809 : : return next;
810 : :
811 : : /*
812 : : * The bitmap search --- search forward alternately through the actual
813 : : * bitmap and the last-committed copy until we find a bit free in
814 : : * both
815 : : */
816 : 0 : here = bitmap_search_next_usable_block(here, bh, maxblocks);
817 : 0 : return here;
818 : : }
819 : :
820 : : /**
821 : : * claim_block()
822 : : * @lock: the spin lock for this block group
823 : : * @block: the free block (group relative) to allocate
824 : : * @bh: the buffer_head contains the block group bitmap
825 : : *
826 : : * We think we can allocate this block in this bitmap. Try to set the bit.
827 : : * If that succeeds then check that nobody has allocated and then freed the
828 : : * block since we saw that is was not marked in b_committed_data. If it _was_
829 : : * allocated and freed then clear the bit in the bitmap again and return
830 : : * zero (failure).
831 : : */
832 : : static inline int
833 : 7 : claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
834 : : {
835 : : struct journal_head *jh = bh2jh(bh);
836 : : int ret;
837 : :
838 [ + - # # : 7 : if (ext3_set_bit_atomic(lock, block, bh->b_data))
# # ]
839 : : return 0;
840 : : jbd_lock_bh_state(bh);
841 [ + - ][ - + ]: 7 : if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
[ # # ][ # # ]
[ # # ][ # # ]
842 : 0 : ext3_clear_bit_atomic(lock, block, bh->b_data);
843 : : ret = 0;
844 : : } else {
845 : : ret = 1;
846 : : }
847 : : jbd_unlock_bh_state(bh);
848 : : return ret;
849 : : }
850 : :
851 : : /**
852 : : * ext3_try_to_allocate()
853 : : * @sb: superblock
854 : : * @handle: handle to this transaction
855 : : * @group: given allocation block group
856 : : * @bitmap_bh: bufferhead holds the block bitmap
857 : : * @grp_goal: given target block within the group
858 : : * @count: target number of blocks to allocate
859 : : * @my_rsv: reservation window
860 : : *
861 : : * Attempt to allocate blocks within a give range. Set the range of allocation
862 : : * first, then find the first free bit(s) from the bitmap (within the range),
863 : : * and at last, allocate the blocks by claiming the found free bit as allocated.
864 : : *
865 : : * To set the range of this allocation:
866 : : * if there is a reservation window, only try to allocate block(s) from the
867 : : * file's own reservation window;
868 : : * Otherwise, the allocation range starts from the give goal block, ends at
869 : : * the block group's last block.
870 : : *
871 : : * If we failed to allocate the desired block then we may end up crossing to a
872 : : * new bitmap. In that case we must release write access to the old one via
873 : : * ext3_journal_release_buffer(), else we'll run out of credits.
874 : : */
875 : : static ext3_grpblk_t
876 : 0 : ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
877 : : struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
878 : : unsigned long *count, struct ext3_reserve_window *my_rsv)
879 : : {
880 : : ext3_fsblk_t group_first_block;
881 : : ext3_grpblk_t start, end;
882 : : unsigned long num = 0;
883 : :
884 : : /* we do allocation within the reservation window if we have a window */
885 [ + + ]: 7 : if (my_rsv) {
886 : 6 : group_first_block = ext3_group_first_block_no(sb, group);
887 [ + - ]: 6 : if (my_rsv->_rsv_start >= group_first_block)
888 : 6 : start = my_rsv->_rsv_start - group_first_block;
889 : : else
890 : : /* reservation window cross group boundary */
891 : : start = 0;
892 : 6 : end = my_rsv->_rsv_end - group_first_block + 1;
893 [ - + ]: 6 : if (end > EXT3_BLOCKS_PER_GROUP(sb))
894 : : /* reservation window crosses group boundary */
895 : 0 : end = EXT3_BLOCKS_PER_GROUP(sb);
896 [ + + ]: 6 : if ((start <= grp_goal) && (grp_goal < end))
897 : : start = grp_goal;
898 : : else
899 : : grp_goal = -1;
900 : : } else {
901 [ - + ]: 1 : if (grp_goal > 0)
902 : : start = grp_goal;
903 : : else
904 : : start = 0;
905 : 1 : end = EXT3_BLOCKS_PER_GROUP(sb);
906 : : }
907 : :
908 [ - + ]: 7 : BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
909 : :
910 : : repeat:
911 [ + + ][ + + ]: 7 : if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
912 : 4 : grp_goal = find_next_usable_block(start, bitmap_bh, end);
913 [ + - ]: 4 : if (grp_goal < 0)
914 : : goto fail_access;
915 [ + + ]: 4 : if (!my_rsv) {
916 : : int i;
917 : :
918 [ + - + + ]: 6 : for (i = 0; i < 7 && grp_goal > start &&
919 : 3 : ext3_test_allocatable(grp_goal - 1,
920 : : bitmap_bh);
921 : 2 : i++, grp_goal--)
922 : : ;
923 : : }
924 : : }
925 : : start = grp_goal;
926 : :
927 [ - + ]: 7 : if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
928 : : grp_goal, bitmap_bh)) {
929 : : /*
930 : : * The block was allocated by another thread, or it was
931 : : * allocated and then freed by another thread
932 : : */
933 : 0 : start++;
934 : : grp_goal++;
935 [ # # ]: 0 : if (start >= end)
936 : : goto fail_access;
937 : : goto repeat;
938 : : }
939 : : num++;
940 : 7 : grp_goal++;
941 [ - + ][ # # ]: 7 : while (num < *count && grp_goal < end
942 [ # # ]: 0 : && ext3_test_allocatable(grp_goal, bitmap_bh)
943 [ # # ]: 0 : && claim_block(sb_bgl_lock(EXT3_SB(sb), group),
944 : : grp_goal, bitmap_bh)) {
945 : 0 : num++;
946 : 0 : grp_goal++;
947 : : }
948 : 7 : *count = num;
949 : 7 : return grp_goal - num;
950 : : fail_access:
951 : 0 : *count = num;
952 : : return -1;
953 : : }
954 : :
955 : : /**
956 : : * find_next_reservable_window():
957 : : * find a reservable space within the given range.
958 : : * It does not allocate the reservation window for now:
959 : : * alloc_new_reservation() will do the work later.
960 : : *
961 : : * @search_head: the head of the searching list;
962 : : * This is not necessarily the list head of the whole filesystem
963 : : *
964 : : * We have both head and start_block to assist the search
965 : : * for the reservable space. The list starts from head,
966 : : * but we will shift to the place where start_block is,
967 : : * then start from there, when looking for a reservable space.
968 : : *
969 : : * @my_rsv: the reservation window
970 : : *
971 : : * @sb: the super block
972 : : *
973 : : * @start_block: the first block we consider to start
974 : : * the real search from
975 : : *
976 : : * @last_block:
977 : : * the maximum block number that our goal reservable space
978 : : * could start from. This is normally the last block in this
979 : : * group. The search will end when we found the start of next
980 : : * possible reservable space is out of this boundary.
981 : : * This could handle the cross boundary reservation window
982 : : * request.
983 : : *
984 : : * basically we search from the given range, rather than the whole
985 : : * reservation double linked list, (start_block, last_block)
986 : : * to find a free region that is of my size and has not
987 : : * been reserved.
988 : : *
989 : : */
990 : 0 : static int find_next_reservable_window(
991 : : struct ext3_reserve_window_node *search_head,
992 : : struct ext3_reserve_window_node *my_rsv,
993 : : struct super_block * sb,
994 : : ext3_fsblk_t start_block,
995 : : ext3_fsblk_t last_block)
996 : : {
997 : : struct rb_node *next;
998 : : struct ext3_reserve_window_node *rsv, *prev;
999 : : ext3_fsblk_t cur;
1000 : 6 : int size = my_rsv->rsv_goal_size;
1001 : :
1002 : : /* TODO: make the start of the reservation window byte-aligned */
1003 : : /* cur = *start_block & ~7;*/
1004 : : cur = start_block;
1005 : : rsv = search_head;
1006 [ + - ]: 6 : if (!rsv)
1007 : : return -1;
1008 : :
1009 : : while (1) {
1010 [ - + ]: 6 : if (cur <= rsv->rsv_end)
1011 : 0 : cur = rsv->rsv_end + 1;
1012 : :
1013 : : /* TODO?
1014 : : * in the case we could not find a reservable space
1015 : : * that is what is expected, during the re-search, we could
1016 : : * remember what's the largest reservable space we could have
1017 : : * and return that one.
1018 : : *
1019 : : * For now it will fail if we could not find the reservable
1020 : : * space with expected-size (or more)...
1021 : : */
1022 [ + - ]: 6 : if (cur > last_block)
1023 : : return -1; /* fail */
1024 : :
1025 : : prev = rsv;
1026 : 6 : next = rb_next(&rsv->rsv_node);
1027 : : rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node);
1028 : :
1029 : : /*
1030 : : * Reached the last reservation, we can just append to the
1031 : : * previous one.
1032 : : */
1033 [ - + ]: 6 : if (!next)
1034 : : break;
1035 : :
1036 [ # # ]: 0 : if (cur + size <= rsv->rsv_start) {
1037 : : /*
1038 : : * Found a reserveable space big enough. We could
1039 : : * have a reservation across the group boundary here
1040 : : */
1041 : : break;
1042 : : }
1043 : : }
1044 : : /*
1045 : : * we come here either :
1046 : : * when we reach the end of the whole list,
1047 : : * and there is empty reservable space after last entry in the list.
1048 : : * append it to the end of the list.
1049 : : *
1050 : : * or we found one reservable space in the middle of the list,
1051 : : * return the reservation window that we could append to.
1052 : : * succeed.
1053 : : */
1054 : :
1055 [ + + ][ - + ]: 6 : if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
1056 : : rsv_window_remove(sb, my_rsv);
1057 : :
1058 : : /*
1059 : : * Let's book the whole available window for now. We will check the
1060 : : * disk bitmap later and then, if there are free blocks then we adjust
1061 : : * the window size if it's larger than requested.
1062 : : * Otherwise, we will remove this node from the tree next time
1063 : : * call find_next_reservable_window.
1064 : : */
1065 : 6 : my_rsv->rsv_start = cur;
1066 : 6 : my_rsv->rsv_end = cur + size - 1;
1067 : 6 : my_rsv->rsv_alloc_hit = 0;
1068 : :
1069 [ + + ]: 6 : if (prev != my_rsv)
1070 : 3 : ext3_rsv_window_add(sb, my_rsv);
1071 : :
1072 : : return 0;
1073 : : }
1074 : :
1075 : : /**
1076 : : * alloc_new_reservation()--allocate a new reservation window
1077 : : *
1078 : : * To make a new reservation, we search part of the filesystem
1079 : : * reservation list (the list that inside the group). We try to
1080 : : * allocate a new reservation window near the allocation goal,
1081 : : * or the beginning of the group, if there is no goal.
1082 : : *
1083 : : * We first find a reservable space after the goal, then from
1084 : : * there, we check the bitmap for the first free block after
1085 : : * it. If there is no free block until the end of group, then the
1086 : : * whole group is full, we failed. Otherwise, check if the free
1087 : : * block is inside the expected reservable space, if so, we
1088 : : * succeed.
1089 : : * If the first free block is outside the reservable space, then
1090 : : * start from the first free block, we search for next available
1091 : : * space, and go on.
1092 : : *
1093 : : * on succeed, a new reservation will be found and inserted into the list
1094 : : * It contains at least one free block, and it does not overlap with other
1095 : : * reservation windows.
1096 : : *
1097 : : * failed: we failed to find a reservation window in this group
1098 : : *
1099 : : * @my_rsv: the reservation window
1100 : : *
1101 : : * @grp_goal: The goal (group-relative). It is where the search for a
1102 : : * free reservable space should start from.
1103 : : * if we have a grp_goal(grp_goal >0 ), then start from there,
1104 : : * no grp_goal(grp_goal = -1), we start from the first block
1105 : : * of the group.
1106 : : *
1107 : : * @sb: the super block
1108 : : * @group: the group we are trying to allocate in
1109 : : * @bitmap_bh: the block group block bitmap
1110 : : *
1111 : : */
1112 : 0 : static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
1113 : 3 : ext3_grpblk_t grp_goal, struct super_block *sb,
1114 : : unsigned int group, struct buffer_head *bitmap_bh)
1115 : : {
1116 : : struct ext3_reserve_window_node *search_head;
1117 : : ext3_fsblk_t group_first_block, group_end_block, start_block;
1118 : : ext3_grpblk_t first_free_block;
1119 : 3 : struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
1120 : : unsigned long size;
1121 : : int ret;
1122 : : spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
1123 : :
1124 : : group_first_block = ext3_group_first_block_no(sb, group);
1125 : 3 : group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1126 : :
1127 [ + - ]: 3 : if (grp_goal < 0)
1128 : : start_block = group_first_block;
1129 : : else
1130 : 3 : start_block = grp_goal + group_first_block;
1131 : :
1132 : : trace_ext3_alloc_new_reservation(sb, start_block);
1133 : 3 : size = my_rsv->rsv_goal_size;
1134 : :
1135 [ - + ]: 3 : if (!rsv_is_empty(&my_rsv->rsv_window)) {
1136 : : /*
1137 : : * if the old reservation is cross group boundary
1138 : : * and if the goal is inside the old reservation window,
1139 : : * we will come here when we just failed to allocate from
1140 : : * the first part of the window. We still have another part
1141 : : * that belongs to the next group. In this case, there is no
1142 : : * point to discard our window and try to allocate a new one
1143 : : * in this group(which will fail). we should
1144 : : * keep the reservation window, just simply move on.
1145 : : *
1146 : : * Maybe we could shift the start block of the reservation
1147 : : * window to the first block of next group.
1148 : : */
1149 : :
1150 [ # # ][ # # ]: 0 : if ((my_rsv->rsv_start <= group_end_block) &&
1151 [ # # ]: 0 : (my_rsv->rsv_end > group_end_block) &&
1152 : : (start_block >= my_rsv->rsv_start))
1153 : : return -1;
1154 : :
1155 [ # # ]: 0 : if ((my_rsv->rsv_alloc_hit >
1156 : 0 : (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
1157 : : /*
1158 : : * if the previously allocation hit ratio is
1159 : : * greater than 1/2, then we double the size of
1160 : : * the reservation window the next time,
1161 : : * otherwise we keep the same size window
1162 : : */
1163 : 0 : size = size * 2;
1164 [ # # ]: 0 : if (size > EXT3_MAX_RESERVE_BLOCKS)
1165 : : size = EXT3_MAX_RESERVE_BLOCKS;
1166 : 0 : my_rsv->rsv_goal_size= size;
1167 : : }
1168 : : }
1169 : :
1170 : : spin_lock(rsv_lock);
1171 : : /*
1172 : : * shift the search start to the window near the goal block
1173 : : */
1174 : 3 : search_head = search_reserve_window(fs_rsv_root, start_block);
1175 : :
1176 : : /*
1177 : : * find_next_reservable_window() simply finds a reservable window
1178 : : * inside the given range(start_block, group_end_block).
1179 : : *
1180 : : * To make sure the reservation window has a free bit inside it, we
1181 : : * need to check the bitmap after we found a reservable window.
1182 : : */
1183 : : retry:
1184 : 6 : ret = find_next_reservable_window(search_head, my_rsv, sb,
1185 : : start_block, group_end_block);
1186 : :
1187 [ - + ]: 6 : if (ret == -1) {
1188 [ # # ]: 0 : if (!rsv_is_empty(&my_rsv->rsv_window))
1189 : : rsv_window_remove(sb, my_rsv);
1190 : : spin_unlock(rsv_lock);
1191 : 0 : return -1;
1192 : : }
1193 : :
1194 : : /*
1195 : : * On success, find_next_reservable_window() returns the
1196 : : * reservation window where there is a reservable space after it.
1197 : : * Before we reserve this reservable space, we need
1198 : : * to make sure there is at least a free block inside this region.
1199 : : *
1200 : : * searching the first free bit on the block bitmap and copy of
1201 : : * last committed bitmap alternatively, until we found a allocatable
1202 : : * block. Search start from the start block of the reservable space
1203 : : * we just found.
1204 : : */
1205 : : spin_unlock(rsv_lock);
1206 : 6 : first_free_block = bitmap_search_next_usable_block(
1207 : 6 : my_rsv->rsv_start - group_first_block,
1208 : 6 : bitmap_bh, group_end_block - group_first_block + 1);
1209 : :
1210 [ - + ]: 6 : if (first_free_block < 0) {
1211 : : /*
1212 : : * no free block left on the bitmap, no point
1213 : : * to reserve the space. return failed.
1214 : : */
1215 : : spin_lock(rsv_lock);
1216 [ # # ]: 0 : if (!rsv_is_empty(&my_rsv->rsv_window))
1217 : : rsv_window_remove(sb, my_rsv);
1218 : : spin_unlock(rsv_lock);
1219 : 0 : return -1; /* failed */
1220 : : }
1221 : :
1222 : 6 : start_block = first_free_block + group_first_block;
1223 : : /*
1224 : : * check if the first free block is within the
1225 : : * free space we just reserved
1226 : : */
1227 [ + - ][ + + ]: 6 : if (start_block >= my_rsv->rsv_start &&
1228 : 6 : start_block <= my_rsv->rsv_end) {
1229 : : trace_ext3_reserved(sb, start_block, my_rsv);
1230 : : return 0; /* success */
1231 : : }
1232 : : /*
1233 : : * if the first free bit we found is out of the reservable space
1234 : : * continue search for next reservable space,
1235 : : * start from where the free block is,
1236 : : * we also shift the list head to where we stopped last time
1237 : : */
1238 : : search_head = my_rsv;
1239 : : spin_lock(rsv_lock);
1240 : : goto retry;
1241 : : }
1242 : :
1243 : : /**
1244 : : * try_to_extend_reservation()
1245 : : * @my_rsv: given reservation window
1246 : : * @sb: super block
1247 : : * @size: the delta to extend
1248 : : *
1249 : : * Attempt to expand the reservation window large enough to have
1250 : : * required number of free blocks
1251 : : *
1252 : : * Since ext3_try_to_allocate() will always allocate blocks within
1253 : : * the reservation window range, if the window size is too small,
1254 : : * multiple blocks allocation has to stop at the end of the reservation
1255 : : * window. To make this more efficient, given the total number of
1256 : : * blocks needed and the current size of the window, we try to
1257 : : * expand the reservation window size if necessary on a best-effort
1258 : : * basis before ext3_new_blocks() tries to allocate blocks,
1259 : : */
1260 : 0 : static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
1261 : 0 : struct super_block *sb, int size)
1262 : : {
1263 : : struct ext3_reserve_window_node *next_rsv;
1264 : : struct rb_node *next;
1265 : : spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
1266 : :
1267 [ # # ]: 0 : if (!spin_trylock(rsv_lock))
1268 : 0 : return;
1269 : :
1270 : 0 : next = rb_next(&my_rsv->rsv_node);
1271 : :
1272 [ # # ]: 0 : if (!next)
1273 : 0 : my_rsv->rsv_end += size;
1274 : : else {
1275 : : next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node);
1276 : :
1277 [ # # ]: 0 : if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
1278 : 0 : my_rsv->rsv_end += size;
1279 : : else
1280 : 0 : my_rsv->rsv_end = next_rsv->rsv_start - 1;
1281 : : }
1282 : : spin_unlock(rsv_lock);
1283 : : }
1284 : :
1285 : : /**
1286 : : * ext3_try_to_allocate_with_rsv()
1287 : : * @sb: superblock
1288 : : * @handle: handle to this transaction
1289 : : * @group: given allocation block group
1290 : : * @bitmap_bh: bufferhead holds the block bitmap
1291 : : * @grp_goal: given target block within the group
1292 : : * @my_rsv: reservation window
1293 : : * @count: target number of blocks to allocate
1294 : : * @errp: pointer to store the error code
1295 : : *
1296 : : * This is the main function used to allocate a new block and its reservation
1297 : : * window.
1298 : : *
1299 : : * Each time when a new block allocation is need, first try to allocate from
1300 : : * its own reservation. If it does not have a reservation window, instead of
1301 : : * looking for a free bit on bitmap first, then look up the reservation list to
1302 : : * see if it is inside somebody else's reservation window, we try to allocate a
1303 : : * reservation window for it starting from the goal first. Then do the block
1304 : : * allocation within the reservation window.
1305 : : *
1306 : : * This will avoid keeping on searching the reservation list again and
1307 : : * again when somebody is looking for a free block (without
1308 : : * reservation), and there are lots of free blocks, but they are all
1309 : : * being reserved.
1310 : : *
1311 : : * We use a red-black tree for the per-filesystem reservation list.
1312 : : *
1313 : : */
1314 : : static ext3_grpblk_t
1315 : 0 : ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1316 : : unsigned int group, struct buffer_head *bitmap_bh,
1317 : : ext3_grpblk_t grp_goal,
1318 : : struct ext3_reserve_window_node * my_rsv,
1319 : : unsigned long *count, int *errp)
1320 : : {
1321 : : ext3_fsblk_t group_first_block, group_last_block;
1322 : : ext3_grpblk_t ret = 0;
1323 : : int fatal;
1324 : 7 : unsigned long num = *count;
1325 : :
1326 : 7 : *errp = 0;
1327 : :
1328 : : /*
1329 : : * Make sure we use undo access for the bitmap, because it is critical
1330 : : * that we do the frozen_data COW on bitmap buffers in all cases even
1331 : : * if the buffer is in BJ_Forget state in the committing transaction.
1332 : : */
1333 : : BUFFER_TRACE(bitmap_bh, "get undo access for new block");
1334 : 7 : fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
1335 [ - + ]: 7 : if (fatal) {
1336 : 0 : *errp = fatal;
1337 : 0 : return -1;
1338 : : }
1339 : :
1340 : : /*
1341 : : * we don't deal with reservation when
1342 : : * filesystem is mounted without reservation
1343 : : * or the file is not a regular file
1344 : : * or last attempt to allocate a block with reservation turned on failed
1345 : : */
1346 [ + + ]: 7 : if (my_rsv == NULL ) {
1347 : 1 : ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1348 : : grp_goal, count, NULL);
1349 : 1 : goto out;
1350 : : }
1351 : : /*
1352 : : * grp_goal is a group relative block number (if there is a goal)
1353 : : * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
1354 : : * first block is a filesystem wide block number
1355 : : * first block is the block number of the first block in this group
1356 : : */
1357 : : group_first_block = ext3_group_first_block_no(sb, group);
1358 : 6 : group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1359 : :
1360 : : /*
1361 : : * Basically we will allocate a new block from inode's reservation
1362 : : * window.
1363 : : *
1364 : : * We need to allocate a new reservation window, if:
1365 : : * a) inode does not have a reservation window; or
1366 : : * b) last attempt to allocate a block from existing reservation
1367 : : * failed; or
1368 : : * c) we come here with a goal and with a reservation window
1369 : : *
1370 : : * We do not need to allocate a new reservation window if we come here
1371 : : * at the beginning with a goal and the goal is inside the window, or
1372 : : * we don't have a goal but already have a reservation window.
1373 : : * then we could go to allocate from the reservation window directly.
1374 : : */
1375 : : while (1) {
1376 [ + + ]: 9 : if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
[ + - - + ]
1377 : 3 : !goal_in_my_reservation(&my_rsv->rsv_window,
1378 : : grp_goal, group, sb)) {
1379 [ - + ]: 3 : if (my_rsv->rsv_goal_size < *count)
1380 : 0 : my_rsv->rsv_goal_size = *count;
1381 : 3 : ret = alloc_new_reservation(my_rsv, grp_goal, sb,
1382 : : group, bitmap_bh);
1383 [ + - ]: 3 : if (ret < 0)
1384 : : break; /* failed */
1385 : :
1386 [ + - ]: 3 : if (!goal_in_my_reservation(&my_rsv->rsv_window,
1387 : : grp_goal, group, sb))
1388 : : grp_goal = -1;
1389 [ + - ]: 3 : } else if (grp_goal >= 0) {
1390 : 3 : int curr = my_rsv->rsv_end -
1391 : 6 : (grp_goal + group_first_block) + 1;
1392 : :
1393 [ - + ]: 3 : if (curr < *count)
1394 : 0 : try_to_extend_reservation(my_rsv, sb,
1395 : 0 : *count - curr);
1396 : : }
1397 : :
1398 [ + - ][ - + ]: 6 : if ((my_rsv->rsv_start > group_last_block) ||
1399 : 6 : (my_rsv->rsv_end < group_first_block)) {
1400 : 0 : rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
1401 : 0 : BUG();
1402 : : }
1403 : 6 : ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
1404 : : grp_goal, &num, &my_rsv->rsv_window);
1405 [ + - ]: 6 : if (ret >= 0) {
1406 : 6 : my_rsv->rsv_alloc_hit += num;
1407 : 6 : *count = num;
1408 : 6 : break; /* succeed */
1409 : : }
1410 : 0 : num = *count;
1411 : 0 : }
1412 : : out:
1413 [ + - ]: 7 : if (ret >= 0) {
1414 : : BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
1415 : : "bitmap block");
1416 : 7 : fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
1417 [ - + ]: 7 : if (fatal) {
1418 : 0 : *errp = fatal;
1419 : 0 : return -1;
1420 : : }
1421 : : return ret;
1422 : : }
1423 : :
1424 : : BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
1425 : : ext3_journal_release_buffer(handle, bitmap_bh);
1426 : 0 : return ret;
1427 : : }
1428 : :
1429 : : /**
1430 : : * ext3_has_free_blocks()
1431 : : * @sbi: in-core super block structure.
1432 : : *
1433 : : * Check if filesystem has at least 1 free block available for allocation.
1434 : : */
1435 : 0 : static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
1436 : : {
1437 : : ext3_fsblk_t free_blocks, root_blocks;
1438 : :
1439 : 14 : free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1440 : 7 : root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1441 [ - + ][ # # ]: 7 : if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
[ # # ]
1442 [ # # ][ # # ]: 0 : !use_reservation && !uid_eq(sbi->s_resuid, current_fsuid()) &&
1443 [ # # ]: 0 : (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) ||
1444 : 0 : !in_group_p (sbi->s_resgid))) {
1445 : : return 0;
1446 : : }
1447 : : return 1;
1448 : : }
1449 : :
1450 : : /**
1451 : : * ext3_should_retry_alloc()
1452 : : * @sb: super block
1453 : : * @retries number of attemps has been made
1454 : : *
1455 : : * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
1456 : : * it is profitable to retry the operation, this function will wait
1457 : : * for the current or committing transaction to complete, and then
1458 : : * return TRUE.
1459 : : *
1460 : : * if the total number of retries exceed three times, return FALSE.
1461 : : */
1462 : 0 : int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1463 : : {
1464 [ # # ][ # # ]: 0 : if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
1465 : : return 0;
1466 : :
1467 : : jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
1468 : :
1469 : 0 : return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
1470 : : }
1471 : :
1472 : : /**
1473 : : * ext3_new_blocks() -- core block(s) allocation function
1474 : : * @handle: handle to this transaction
1475 : : * @inode: file inode
1476 : : * @goal: given target block(filesystem wide)
1477 : : * @count: target number of blocks to allocate
1478 : : * @errp: error code
1479 : : *
1480 : : * ext3_new_blocks uses a goal block to assist allocation. It tries to
1481 : : * allocate block(s) from the block group contains the goal block first. If that
1482 : : * fails, it will try to allocate block(s) from other block groups without
1483 : : * any specific goal block.
1484 : : *
1485 : : */
1486 : 0 : ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1487 : : ext3_fsblk_t goal, unsigned long *count, int *errp)
1488 : : {
1489 : : struct buffer_head *bitmap_bh = NULL;
1490 : : struct buffer_head *gdp_bh;
1491 : : int group_no;
1492 : : int goal_group;
1493 : : ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */
1494 : : ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
1495 : : ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */
1496 : : int bgi; /* blockgroup iteration index */
1497 : 7 : int fatal = 0, err;
1498 : : int performed_allocation = 0;
1499 : : ext3_grpblk_t free_blocks; /* number of free blocks in a group */
1500 : 21 : struct super_block *sb;
1501 : : struct ext3_group_desc *gdp;
1502 : : struct ext3_super_block *es;
1503 : 14 : struct ext3_sb_info *sbi;
1504 : : struct ext3_reserve_window_node *my_rsv = NULL;
1505 : : struct ext3_block_alloc_info *block_i;
1506 : : unsigned short windowsz = 0;
1507 : : #ifdef EXT3FS_DEBUG
1508 : : static int goal_hits, goal_attempts;
1509 : : #endif
1510 : : unsigned long ngroups;
1511 : 7 : unsigned long num = *count;
1512 : :
1513 : 7 : *errp = -ENOSPC;
1514 : 7 : sb = inode->i_sb;
1515 : :
1516 : : /*
1517 : : * Check quota for allocation of this block.
1518 : : */
1519 : 7 : err = dquot_alloc_block(inode, num);
1520 [ - + ]: 7 : if (err) {
1521 : 0 : *errp = err;
1522 : 0 : return 0;
1523 : : }
1524 : :
1525 : 7 : trace_ext3_request_blocks(inode, goal, num);
1526 : :
1527 : : sbi = EXT3_SB(sb);
1528 : 7 : es = sbi->s_es;
1529 : : ext3_debug("goal=%lu.\n", goal);
1530 : : /*
1531 : : * Allocate a block from reservation only when
1532 : : * filesystem is mounted with reservation(default,-o reservation), and
1533 : : * it's a regular file, and
1534 : : * the desired window size is greater than 0 (One could use ioctl
1535 : : * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off
1536 : : * reservation on that particular file)
1537 : : */
1538 : 7 : block_i = EXT3_I(inode)->i_block_alloc_info;
1539 [ + + ][ + - ]: 7 : if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1540 : 6 : my_rsv = &block_i->rsv_window_node;
1541 : :
1542 [ - + ]: 7 : if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
1543 : 0 : *errp = -ENOSPC;
1544 : 0 : goto out;
1545 : : }
1546 : :
1547 : : /*
1548 : : * First, test whether the goal block is free.
1549 : : */
1550 [ + - ][ + + ]: 7 : if (goal < le32_to_cpu(es->s_first_data_block) ||
1551 : 7 : goal >= le32_to_cpu(es->s_blocks_count))
1552 : : goal = le32_to_cpu(es->s_first_data_block);
1553 : 7 : group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
1554 : 7 : EXT3_BLOCKS_PER_GROUP(sb);
1555 : : goal_group = group_no;
1556 : : retry_alloc:
1557 : 7 : gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
1558 [ + - ]: 7 : if (!gdp)
1559 : : goto io_error;
1560 : :
1561 : 7 : free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1562 : : /*
1563 : : * if there is not enough free blocks to make a new resevation
1564 : : * turn off reservation for this allocation
1565 : : */
1566 [ + + ][ - + ]: 7 : if (my_rsv && (free_blocks < windowsz)
1567 [ # # ]: 0 : && (free_blocks > 0)
1568 [ # # ]: 0 : && (rsv_is_empty(&my_rsv->rsv_window)))
1569 : : my_rsv = NULL;
1570 : :
1571 [ + - ]: 7 : if (free_blocks > 0) {
1572 : 14 : grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
1573 : 7 : EXT3_BLOCKS_PER_GROUP(sb));
1574 : 7 : bitmap_bh = read_block_bitmap(sb, group_no);
1575 [ + - ]: 7 : if (!bitmap_bh)
1576 : : goto io_error;
1577 : 7 : grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
1578 : : group_no, bitmap_bh, grp_target_blk,
1579 : : my_rsv, &num, &fatal);
1580 [ + - ]: 7 : if (fatal)
1581 : : goto out;
1582 [ - + ]: 7 : if (grp_alloc_blk >= 0)
1583 : : goto allocated;
1584 : : }
1585 : :
1586 : 0 : ngroups = EXT3_SB(sb)->s_groups_count;
1587 : 0 : smp_rmb();
1588 : :
1589 : : /*
1590 : : * Now search the rest of the groups. We assume that
1591 : : * group_no and gdp correctly point to the last group visited.
1592 : : */
1593 [ # # ]: 0 : for (bgi = 0; bgi < ngroups; bgi++) {
1594 : 0 : group_no++;
1595 [ # # ]: 0 : if (group_no >= ngroups)
1596 : : group_no = 0;
1597 : 0 : gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
1598 [ # # ]: 0 : if (!gdp)
1599 : : goto io_error;
1600 : 0 : free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1601 : : /*
1602 : : * skip this group (and avoid loading bitmap) if there
1603 : : * are no free blocks
1604 : : */
1605 [ # # ]: 0 : if (!free_blocks)
1606 : 0 : continue;
1607 : : /*
1608 : : * skip this group if the number of
1609 : : * free blocks is less than half of the reservation
1610 : : * window size.
1611 : : */
1612 [ # # ][ # # ]: 0 : if (my_rsv && (free_blocks <= (windowsz/2)))
1613 : 0 : continue;
1614 : :
1615 : : brelse(bitmap_bh);
1616 : 0 : bitmap_bh = read_block_bitmap(sb, group_no);
1617 [ # # ]: 0 : if (!bitmap_bh)
1618 : : goto io_error;
1619 : : /*
1620 : : * try to allocate block(s) from this group, without a goal(-1).
1621 : : */
1622 : 0 : grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
1623 : : group_no, bitmap_bh, -1, my_rsv,
1624 : : &num, &fatal);
1625 [ # # ]: 0 : if (fatal)
1626 : : goto out;
1627 [ # # ]: 0 : if (grp_alloc_blk >= 0)
1628 : : goto allocated;
1629 : : }
1630 : : /*
1631 : : * We may end up a bogus earlier ENOSPC error due to
1632 : : * filesystem is "full" of reservations, but
1633 : : * there maybe indeed free blocks available on disk
1634 : : * In this case, we just forget about the reservations
1635 : : * just do block allocation as without reservations.
1636 : : */
1637 [ # # ]: 0 : if (my_rsv) {
1638 : : my_rsv = NULL;
1639 : : windowsz = 0;
1640 : : group_no = goal_group;
1641 : : goto retry_alloc;
1642 : : }
1643 : : /* No space left on the device */
1644 : 0 : *errp = -ENOSPC;
1645 : 0 : goto out;
1646 : :
1647 : : allocated:
1648 : :
1649 : : ext3_debug("using block group %d(%d)\n",
1650 : : group_no, gdp->bg_free_blocks_count);
1651 : :
1652 : : BUFFER_TRACE(gdp_bh, "get_write_access");
1653 : 7 : fatal = ext3_journal_get_write_access(handle, gdp_bh);
1654 [ + - ]: 7 : if (fatal)
1655 : : goto out;
1656 : :
1657 : 14 : ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no);
1658 : :
1659 [ - + ][ # # ]: 7 : if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
[ - + ]
1660 [ # # ][ + - ]: 7 : in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
1661 [ + - ]: 7 : in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
1662 [ + - ]: 7 : EXT3_SB(sb)->s_itb_per_group) ||
1663 [ - + ]: 7 : in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
1664 : : EXT3_SB(sb)->s_itb_per_group)) {
1665 : 0 : ext3_error(sb, "ext3_new_block",
1666 : : "Allocating block in system zone - "
1667 : : "blocks from "E3FSBLK", length %lu",
1668 : : ret_block, num);
1669 : : /*
1670 : : * claim_block() marked the blocks we allocated as in use. So we
1671 : : * may want to selectively mark some of the blocks as free.
1672 : : */
1673 : 0 : goto retry_alloc;
1674 : : }
1675 : :
1676 : : performed_allocation = 1;
1677 : :
1678 : : #ifdef CONFIG_JBD_DEBUG
1679 : : {
1680 : : struct buffer_head *debug_bh;
1681 : :
1682 : : /* Record bitmap buffer state in the newly allocated block */
1683 : : debug_bh = sb_find_get_block(sb, ret_block);
1684 : : if (debug_bh) {
1685 : : BUFFER_TRACE(debug_bh, "state when allocated");
1686 : : BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
1687 : : brelse(debug_bh);
1688 : : }
1689 : : }
1690 : : jbd_lock_bh_state(bitmap_bh);
1691 : : spin_lock(sb_bgl_lock(sbi, group_no));
1692 : : if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
1693 : : int i;
1694 : :
1695 : : for (i = 0; i < num; i++) {
1696 : : if (ext3_test_bit(grp_alloc_blk+i,
1697 : : bh2jh(bitmap_bh)->b_committed_data)) {
1698 : : printk("%s: block was unexpectedly set in "
1699 : : "b_committed_data\n", __func__);
1700 : : }
1701 : : }
1702 : : }
1703 : : ext3_debug("found bit %d\n", grp_alloc_blk);
1704 : : spin_unlock(sb_bgl_lock(sbi, group_no));
1705 : : jbd_unlock_bh_state(bitmap_bh);
1706 : : #endif
1707 : :
1708 [ - + ]: 7 : if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
1709 : 0 : ext3_error(sb, "ext3_new_block",
1710 : : "block("E3FSBLK") >= blocks count(%d) - "
1711 : : "block_group = %d, es == %p ", ret_block,
1712 : : le32_to_cpu(es->s_blocks_count), group_no, es);
1713 : 0 : goto out;
1714 : : }
1715 : :
1716 : : /*
1717 : : * It is up to the caller to add the new buffer to a journal
1718 : : * list of some description. We don't know in advance whether
1719 : : * the caller wants to use it as metadata or data.
1720 : : */
1721 : : ext3_debug("allocating block %lu. Goal hits %d of %d.\n",
1722 : : ret_block, goal_hits, goal_attempts);
1723 : :
1724 : 7 : spin_lock(sb_bgl_lock(sbi, group_no));
1725 : 7 : le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1726 : 7 : spin_unlock(sb_bgl_lock(sbi, group_no));
1727 : 7 : percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1728 : :
1729 : : BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1730 : 7 : err = ext3_journal_dirty_metadata(handle, gdp_bh);
1731 [ + - ]: 7 : if (!fatal)
1732 : 7 : fatal = err;
1733 : :
1734 [ + - ]: 7 : if (fatal)
1735 : : goto out;
1736 : :
1737 : 7 : *errp = 0;
1738 : : brelse(bitmap_bh);
1739 : :
1740 [ - + ]: 7 : if (num < *count) {
1741 : 0 : dquot_free_block(inode, *count-num);
1742 : 0 : *count = num;
1743 : : }
1744 : :
1745 : 7 : trace_ext3_allocate_blocks(inode, goal, num,
1746 : : (unsigned long long)ret_block);
1747 : :
1748 : 7 : return ret_block;
1749 : :
1750 : : io_error:
1751 : 0 : *errp = -EIO;
1752 : : out:
1753 [ # # ]: 0 : if (fatal) {
1754 : 0 : *errp = fatal;
1755 [ # # ]: 0 : ext3_std_error(sb, fatal);
1756 : : }
1757 : : /*
1758 : : * Undo the block allocation
1759 : : */
1760 [ # # ]: 0 : if (!performed_allocation)
1761 : 0 : dquot_free_block(inode, *count);
1762 : : brelse(bitmap_bh);
1763 : : return 0;
1764 : : }
1765 : :
1766 : 0 : ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
1767 : : ext3_fsblk_t goal, int *errp)
1768 : : {
1769 : 0 : unsigned long count = 1;
1770 : :
1771 : 0 : return ext3_new_blocks(handle, inode, goal, &count, errp);
1772 : : }
1773 : :
1774 : : /**
1775 : : * ext3_count_free_blocks() -- count filesystem free blocks
1776 : : * @sb: superblock
1777 : : *
1778 : : * Adds up the number of free blocks from each block group.
1779 : : */
1780 : 0 : ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
1781 : : {
1782 : : ext3_fsblk_t desc_count;
1783 : : struct ext3_group_desc *gdp;
1784 : : int i;
1785 : 8 : unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
1786 : : #ifdef EXT3FS_DEBUG
1787 : : struct ext3_super_block *es;
1788 : : ext3_fsblk_t bitmap_count;
1789 : : unsigned long x;
1790 : : struct buffer_head *bitmap_bh = NULL;
1791 : :
1792 : : es = EXT3_SB(sb)->s_es;
1793 : : desc_count = 0;
1794 : : bitmap_count = 0;
1795 : : gdp = NULL;
1796 : :
1797 : : smp_rmb();
1798 : : for (i = 0; i < ngroups; i++) {
1799 : : gdp = ext3_get_group_desc(sb, i, NULL);
1800 : : if (!gdp)
1801 : : continue;
1802 : : desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
1803 : : brelse(bitmap_bh);
1804 : : bitmap_bh = read_block_bitmap(sb, i);
1805 : : if (bitmap_bh == NULL)
1806 : : continue;
1807 : :
1808 : : x = ext3_count_free(bitmap_bh, sb->s_blocksize);
1809 : : printk("group %d: stored = %d, counted = %lu\n",
1810 : : i, le16_to_cpu(gdp->bg_free_blocks_count), x);
1811 : : bitmap_count += x;
1812 : : }
1813 : : brelse(bitmap_bh);
1814 : : printk("ext3_count_free_blocks: stored = "E3FSBLK
1815 : : ", computed = "E3FSBLK", "E3FSBLK"\n",
1816 : : (ext3_fsblk_t)le32_to_cpu(es->s_free_blocks_count),
1817 : : desc_count, bitmap_count);
1818 : : return bitmap_count;
1819 : : #else
1820 : : desc_count = 0;
1821 : 8 : smp_rmb();
1822 [ + + ]: 20 : for (i = 0; i < ngroups; i++) {
1823 : 12 : gdp = ext3_get_group_desc(sb, i, NULL);
1824 [ - + ]: 20 : if (!gdp)
1825 : 0 : continue;
1826 : 12 : desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
1827 : : }
1828 : :
1829 : 8 : return desc_count;
1830 : : #endif
1831 : : }
1832 : :
1833 : : static inline int test_root(int a, int b)
1834 : : {
1835 : : int num = b;
1836 : :
1837 [ # # ][ # # ]: 0 : while (a > num)
[ # # ]
1838 : 0 : num *= b;
1839 : : return num == a;
1840 : : }
1841 : :
1842 : 0 : static int ext3_group_sparse(int group)
1843 : : {
1844 [ # # ]: 0 : if (group <= 1)
1845 : : return 1;
1846 [ # # ]: 0 : if (!(group & 1))
1847 : : return 0;
1848 [ # # ][ # # ]: 0 : return (test_root(group, 7) || test_root(group, 5) ||
[ # # ]
1849 : : test_root(group, 3));
1850 : : }
1851 : :
1852 : : /**
1853 : : * ext3_bg_has_super - number of blocks used by the superblock in group
1854 : : * @sb: superblock for filesystem
1855 : : * @group: group number to check
1856 : : *
1857 : : * Return the number of blocks used by the superblock (primary or backup)
1858 : : * in this group. Currently this will be only 0 or 1.
1859 : : */
1860 : 0 : int ext3_bg_has_super(struct super_block *sb, int group)
1861 : : {
1862 [ # # ]: 0 : if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
1863 [ # # ]: 0 : EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
1864 : 0 : !ext3_group_sparse(group))
1865 : : return 0;
1866 : 0 : return 1;
1867 : : }
1868 : :
1869 : : static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
1870 : : {
1871 : : unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
1872 : 0 : unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
1873 : 0 : unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
1874 : :
1875 [ # # ][ # # ]: 0 : if (group == first || group == first + 1 || group == last)
[ # # ]
1876 : : return 1;
1877 : : return 0;
1878 : : }
1879 : :
1880 : : static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
1881 : : {
1882 [ # # ]: 0 : return ext3_bg_has_super(sb, group) ? EXT3_SB(sb)->s_gdb_count : 0;
1883 : : }
1884 : :
1885 : : /**
1886 : : * ext3_bg_num_gdb - number of blocks used by the group table in group
1887 : : * @sb: superblock for filesystem
1888 : : * @group: group number to check
1889 : : *
1890 : : * Return the number of blocks used by the group descriptor table
1891 : : * (primary or backup) in this group. In the future there may be a
1892 : : * different number of descriptor blocks in each group.
1893 : : */
1894 : 0 : unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1895 : : {
1896 : 0 : unsigned long first_meta_bg =
1897 : 0 : le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
1898 : 0 : unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
1899 : :
1900 [ # # ][ # # ]: 0 : if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
1901 : : metagroup < first_meta_bg)
1902 : 0 : return ext3_bg_num_gdb_nometa(sb,group);
1903 : :
1904 : 0 : return ext3_bg_num_gdb_meta(sb,group);
1905 : :
1906 : : }
1907 : :
1908 : : /**
1909 : : * ext3_trim_all_free -- function to trim all free space in alloc. group
1910 : : * @sb: super block for file system
1911 : : * @group: allocation group to trim
1912 : : * @start: first group block to examine
1913 : : * @max: last group block to examine
1914 : : * @gdp: allocation group description structure
1915 : : * @minblocks: minimum extent block count
1916 : : *
1917 : : * ext3_trim_all_free walks through group's block bitmap searching for free
1918 : : * blocks. When the free block is found, it tries to allocate this block and
1919 : : * consequent free block to get the biggest free extent possible, until it
1920 : : * reaches any used block. Then issue a TRIM command on this extent and free
1921 : : * the extent in the block bitmap. This is done until whole group is scanned.
1922 : : */
1923 : 0 : static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1924 : : unsigned int group,
1925 : : ext3_grpblk_t start, ext3_grpblk_t max,
1926 : : ext3_grpblk_t minblocks)
1927 : : {
1928 : : handle_t *handle;
1929 : : ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
1930 : : ext3_fsblk_t discard_block;
1931 : 0 : struct ext3_sb_info *sbi;
1932 : : struct buffer_head *gdp_bh, *bitmap_bh = NULL;
1933 : : struct ext3_group_desc *gdp;
1934 : : int err = 0, ret = 0;
1935 : :
1936 : : /*
1937 : : * We will update one block bitmap, and one group descriptor
1938 : : */
1939 : 0 : handle = ext3_journal_start_sb(sb, 2);
1940 [ # # ]: 0 : if (IS_ERR(handle))
1941 : 0 : return PTR_ERR(handle);
1942 : :
1943 : 0 : bitmap_bh = read_block_bitmap(sb, group);
1944 [ # # ]: 0 : if (!bitmap_bh) {
1945 : : err = -EIO;
1946 : : goto err_out;
1947 : : }
1948 : :
1949 : : BUFFER_TRACE(bitmap_bh, "getting undo access");
1950 : 0 : err = ext3_journal_get_undo_access(handle, bitmap_bh);
1951 [ # # ]: 0 : if (err)
1952 : : goto err_out;
1953 : :
1954 : 0 : gdp = ext3_get_group_desc(sb, group, &gdp_bh);
1955 [ # # ]: 0 : if (!gdp) {
1956 : : err = -EIO;
1957 : : goto err_out;
1958 : : }
1959 : :
1960 : : BUFFER_TRACE(gdp_bh, "get_write_access");
1961 : 0 : err = ext3_journal_get_write_access(handle, gdp_bh);
1962 [ # # ]: 0 : if (err)
1963 : : goto err_out;
1964 : :
1965 : 0 : free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1966 : : sbi = EXT3_SB(sb);
1967 : :
1968 : : /* Walk through the whole group */
1969 [ # # ]: 0 : while (start <= max) {
1970 : 0 : start = bitmap_search_next_usable_block(start, bitmap_bh, max);
1971 [ # # ]: 0 : if (start < 0)
1972 : : break;
1973 : : next = start;
1974 : :
1975 : : /*
1976 : : * Allocate contiguous free extents by setting bits in the
1977 : : * block bitmap
1978 : : */
1979 [ # # ]: 0 : while (next <= max
1980 [ # # ]: 0 : && claim_block(sb_bgl_lock(sbi, group),
1981 : : next, bitmap_bh)) {
1982 : 0 : next++;
1983 : : }
1984 : :
1985 : : /* We did not claim any blocks */
1986 [ # # ]: 0 : if (next == start)
1987 : 0 : continue;
1988 : :
1989 : 0 : discard_block = (ext3_fsblk_t)start +
1990 : : ext3_group_first_block_no(sb, group);
1991 : :
1992 : : /* Update counters */
1993 : 0 : spin_lock(sb_bgl_lock(sbi, group));
1994 : 0 : le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
1995 : 0 : spin_unlock(sb_bgl_lock(sbi, group));
1996 : 0 : percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
1997 : :
1998 : 0 : free_blocks -= next - start;
1999 : : /* Do not issue a TRIM on extents smaller than minblocks */
2000 [ # # ]: 0 : if ((next - start) < minblocks)
2001 : : goto free_extent;
2002 : :
2003 : 0 : trace_ext3_discard_blocks(sb, discard_block, next - start);
2004 : : /* Send the TRIM command down to the device */
2005 : 0 : err = sb_issue_discard(sb, discard_block, next - start,
2006 : : GFP_NOFS, 0);
2007 : 0 : count += (next - start);
2008 : : free_extent:
2009 : : freed = 0;
2010 : :
2011 : : /*
2012 : : * Clear bits in the bitmap
2013 : : */
2014 [ # # ]: 0 : for (bit = start; bit < next; bit++) {
2015 : : BUFFER_TRACE(bitmap_bh, "clear bit");
2016 [ # # ]: 0 : if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
2017 : : bit, bitmap_bh->b_data)) {
2018 : 0 : ext3_error(sb, __func__,
2019 : : "bit already cleared for block "E3FSBLK,
2020 : : (unsigned long)bit);
2021 : : BUFFER_TRACE(bitmap_bh, "bit already cleared");
2022 : : } else {
2023 : 0 : freed++;
2024 : : }
2025 : : }
2026 : :
2027 : : /* Update couters */
2028 : 0 : spin_lock(sb_bgl_lock(sbi, group));
2029 : 0 : le16_add_cpu(&gdp->bg_free_blocks_count, freed);
2030 : 0 : spin_unlock(sb_bgl_lock(sbi, group));
2031 : 0 : percpu_counter_add(&sbi->s_freeblocks_counter, freed);
2032 : :
2033 : : start = next;
2034 [ # # ]: 0 : if (err < 0) {
2035 [ # # ]: 0 : if (err != -EOPNOTSUPP)
2036 : 0 : ext3_warning(sb, __func__, "Discard command "
2037 : : "returned error %d\n", err);
2038 : : break;
2039 : : }
2040 : :
2041 [ # # ]: 0 : if (fatal_signal_pending(current)) {
2042 : : err = -ERESTARTSYS;
2043 : : break;
2044 : : }
2045 : :
2046 : 0 : cond_resched();
2047 : :
2048 : : /* No more suitable extents */
2049 [ # # ]: 0 : if (free_blocks < minblocks)
2050 : : break;
2051 : : }
2052 : :
2053 : : /* We dirtied the bitmap block */
2054 : : BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
2055 : 0 : ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
2056 [ # # ]: 0 : if (!err)
2057 : : err = ret;
2058 : :
2059 : : /* And the group descriptor block */
2060 : : BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
2061 : 0 : ret = ext3_journal_dirty_metadata(handle, gdp_bh);
2062 [ # # ]: 0 : if (!err)
2063 : : err = ret;
2064 : :
2065 : : ext3_debug("trimmed %d blocks in the group %d\n",
2066 : : count, group);
2067 : :
2068 : : err_out:
2069 [ # # ]: 0 : if (err)
2070 : : count = err;
2071 : 0 : ext3_journal_stop(handle);
2072 : : brelse(bitmap_bh);
2073 : :
2074 : 0 : return count;
2075 : : }
2076 : :
2077 : : /**
2078 : : * ext3_trim_fs() -- trim ioctl handle function
2079 : : * @sb: superblock for filesystem
2080 : : * @start: First Byte to trim
2081 : : * @len: number of Bytes to trim from start
2082 : : * @minlen: minimum extent length in Bytes
2083 : : *
2084 : : * ext3_trim_fs goes through all allocation groups containing Bytes from
2085 : : * start to start+len. For each such a group ext3_trim_all_free function
2086 : : * is invoked to trim all free space.
2087 : : */
2088 : 0 : int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2089 : : {
2090 : : ext3_grpblk_t last_block, first_block;
2091 : : unsigned long group, first_group, last_group;
2092 : : struct ext3_group_desc *gdp;
2093 : 0 : struct ext3_super_block *es = EXT3_SB(sb)->s_es;
2094 : : uint64_t start, minlen, end, trimmed = 0;
2095 : 0 : ext3_fsblk_t first_data_blk =
2096 : : le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
2097 : 0 : ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2098 : : int ret = 0;
2099 : :
2100 : 0 : start = range->start >> sb->s_blocksize_bits;
2101 : 0 : end = start + (range->len >> sb->s_blocksize_bits) - 1;
2102 : 0 : minlen = range->minlen >> sb->s_blocksize_bits;
2103 : :
2104 [ # # ][ # # ]: 0 : if (minlen > EXT3_BLOCKS_PER_GROUP(sb) ||
2105 [ # # ]: 0 : start >= max_blks ||
2106 : 0 : range->len < sb->s_blocksize)
2107 : : return -EINVAL;
2108 [ # # ]: 0 : if (end >= max_blks)
2109 : 0 : end = max_blks - 1;
2110 [ # # ]: 0 : if (end <= first_data_blk)
2111 : : goto out;
2112 [ # # ]: 0 : if (start < first_data_blk)
2113 : : start = first_data_blk;
2114 : :
2115 : 0 : smp_rmb();
2116 : :
2117 : : /* Determine first and last group to examine based on start and len */
2118 : 0 : ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
2119 : : &first_group, &first_block);
2120 : 0 : ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end,
2121 : : &last_group, &last_block);
2122 : :
2123 : : /* end now represents the last block to discard in this group */
2124 : 0 : end = EXT3_BLOCKS_PER_GROUP(sb) - 1;
2125 : :
2126 [ # # ]: 0 : for (group = first_group; group <= last_group; group++) {
2127 : 0 : gdp = ext3_get_group_desc(sb, group, NULL);
2128 [ # # ]: 0 : if (!gdp)
2129 : : break;
2130 : :
2131 : : /*
2132 : : * For all the groups except the last one, last block will
2133 : : * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to
2134 : : * change it for the last group, note that last_block is
2135 : : * already computed earlier by ext3_get_group_no_and_offset()
2136 : : */
2137 [ # # ]: 0 : if (group == last_group)
2138 : 0 : end = last_block;
2139 : :
2140 [ # # ]: 0 : if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) {
2141 : 0 : ret = ext3_trim_all_free(sb, group, first_block,
2142 : : end, minlen);
2143 [ # # ]: 0 : if (ret < 0)
2144 : : break;
2145 : 0 : trimmed += ret;
2146 : : }
2147 : :
2148 : : /*
2149 : : * For every group except the first one, we are sure
2150 : : * that the first block to discard will be block #0.
2151 : : */
2152 : : first_block = 0;
2153 : : }
2154 : :
2155 [ # # ]: 0 : if (ret > 0)
2156 : : ret = 0;
2157 : :
2158 : : out:
2159 : 0 : range->len = trimmed * sb->s_blocksize;
2160 : 0 : return ret;
2161 : : }
|