LCOV - code coverage report
Current view: top level - fs/ext4 - mballoc.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1112 1743 63.8 %
Date: 2014-02-18 Functions: 56 76 73.7 %
Branches: 699 1493 46.8 %

           Branch data     Line data    Source code
       1                 :            : /*
       2                 :            :  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
       3                 :            :  * Written by Alex Tomas <alex@clusterfs.com>
       4                 :            :  *
       5                 :            :  * This program is free software; you can redistribute it and/or modify
       6                 :            :  * it under the terms of the GNU General Public License version 2 as
       7                 :            :  * published by the Free Software Foundation.
       8                 :            :  *
       9                 :            :  * This program is distributed in the hope that it will be useful,
      10                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      11                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12                 :            :  * GNU General Public License for more details.
      13                 :            :  *
      14                 :            :  * You should have received a copy of the GNU General Public Licens
      15                 :            :  * along with this program; if not, write to the Free Software
      16                 :            :  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
      17                 :            :  */
      18                 :            : 
      19                 :            : 
      20                 :            : /*
      21                 :            :  * mballoc.c contains the multiblocks allocation routines
      22                 :            :  */
      23                 :            : 
      24                 :            : #include "ext4_jbd2.h"
      25                 :            : #include "mballoc.h"
      26                 :            : #include <linux/log2.h>
      27                 :            : #include <linux/module.h>
      28                 :            : #include <linux/slab.h>
      29                 :            : #include <trace/events/ext4.h>
      30                 :            : 
      31                 :            : #ifdef CONFIG_EXT4_DEBUG
      32                 :            : ushort ext4_mballoc_debug __read_mostly;
      33                 :            : 
      34                 :            : module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
      35                 :            : MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
      36                 :            : #endif
      37                 :            : 
      38                 :            : /*
      39                 :            :  * MUSTDO:
      40                 :            :  *   - test ext4_ext_search_left() and ext4_ext_search_right()
      41                 :            :  *   - search for metadata in few groups
      42                 :            :  *
      43                 :            :  * TODO v4:
      44                 :            :  *   - normalization should take into account whether file is still open
      45                 :            :  *   - discard preallocations if no free space left (policy?)
      46                 :            :  *   - don't normalize tails
      47                 :            :  *   - quota
      48                 :            :  *   - reservation for superuser
      49                 :            :  *
      50                 :            :  * TODO v3:
      51                 :            :  *   - bitmap read-ahead (proposed by Oleg Drokin aka green)
      52                 :            :  *   - track min/max extents in each group for better group selection
      53                 :            :  *   - mb_mark_used() may allocate chunk right after splitting buddy
      54                 :            :  *   - tree of groups sorted by number of free blocks
      55                 :            :  *   - error handling
      56                 :            :  */
      57                 :            : 
      58                 :            : /*
      59                 :            :  * The allocation request involve request for multiple number of blocks
      60                 :            :  * near to the goal(block) value specified.
      61                 :            :  *
      62                 :            :  * During initialization phase of the allocator we decide to use the
      63                 :            :  * group preallocation or inode preallocation depending on the size of
      64                 :            :  * the file. The size of the file could be the resulting file size we
      65                 :            :  * would have after allocation, or the current file size, which ever
      66                 :            :  * is larger. If the size is less than sbi->s_mb_stream_request we
      67                 :            :  * select to use the group preallocation. The default value of
      68                 :            :  * s_mb_stream_request is 16 blocks. This can also be tuned via
      69                 :            :  * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
      70                 :            :  * terms of number of blocks.
      71                 :            :  *
      72                 :            :  * The main motivation for having small file use group preallocation is to
      73                 :            :  * ensure that we have small files closer together on the disk.
      74                 :            :  *
      75                 :            :  * First stage the allocator looks at the inode prealloc list,
      76                 :            :  * ext4_inode_info->i_prealloc_list, which contains list of prealloc
      77                 :            :  * spaces for this particular inode. The inode prealloc space is
      78                 :            :  * represented as:
      79                 :            :  *
      80                 :            :  * pa_lstart -> the logical start block for this prealloc space
      81                 :            :  * pa_pstart -> the physical start block for this prealloc space
      82                 :            :  * pa_len    -> length for this prealloc space (in clusters)
      83                 :            :  * pa_free   ->  free space available in this prealloc space (in clusters)
      84                 :            :  *
      85                 :            :  * The inode preallocation space is used looking at the _logical_ start
      86                 :            :  * block. If only the logical file block falls within the range of prealloc
      87                 :            :  * space we will consume the particular prealloc space. This makes sure that
      88                 :            :  * we have contiguous physical blocks representing the file blocks
      89                 :            :  *
      90                 :            :  * The important thing to be noted in case of inode prealloc space is that
      91                 :            :  * we don't modify the values associated to inode prealloc space except
      92                 :            :  * pa_free.
      93                 :            :  *
      94                 :            :  * If we are not able to find blocks in the inode prealloc space and if we
      95                 :            :  * have the group allocation flag set then we look at the locality group
      96                 :            :  * prealloc space. These are per CPU prealloc list represented as
      97                 :            :  *
      98                 :            :  * ext4_sb_info.s_locality_groups[smp_processor_id()]
      99                 :            :  *
     100                 :            :  * The reason for having a per cpu locality group is to reduce the contention
     101                 :            :  * between CPUs. It is possible to get scheduled at this point.
     102                 :            :  *
     103                 :            :  * The locality group prealloc space is used looking at whether we have
     104                 :            :  * enough free space (pa_free) within the prealloc space.
     105                 :            :  *
     106                 :            :  * If we can't allocate blocks via inode prealloc or/and locality group
     107                 :            :  * prealloc then we look at the buddy cache. The buddy cache is represented
     108                 :            :  * by ext4_sb_info.s_buddy_cache (struct inode) whose file offset gets
     109                 :            :  * mapped to the buddy and bitmap information regarding different
     110                 :            :  * groups. The buddy information is attached to buddy cache inode so that
     111                 :            :  * we can access them through the page cache. The information regarding
     112                 :            :  * each group is loaded via ext4_mb_load_buddy.  The information involve
     113                 :            :  * block bitmap and buddy information. The information are stored in the
     114                 :            :  * inode as:
     115                 :            :  *
     116                 :            :  *  {                        page                        }
     117                 :            :  *  [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
     118                 :            :  *
     119                 :            :  *
     120                 :            :  * one block each for bitmap and buddy information.  So for each group we
     121                 :            :  * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE /
     122                 :            :  * blocksize) blocks.  So it can have information regarding groups_per_page
     123                 :            :  * which is blocks_per_page/2
     124                 :            :  *
     125                 :            :  * The buddy cache inode is not stored on disk. The inode is thrown
     126                 :            :  * away when the filesystem is unmounted.
     127                 :            :  *
     128                 :            :  * We look for count number of blocks in the buddy cache. If we were able
     129                 :            :  * to locate that many free blocks we return with additional information
     130                 :            :  * regarding rest of the contiguous physical block available
     131                 :            :  *
     132                 :            :  * Before allocating blocks via buddy cache we normalize the request
     133                 :            :  * blocks. This ensure we ask for more blocks that we needed. The extra
     134                 :            :  * blocks that we get after allocation is added to the respective prealloc
     135                 :            :  * list. In case of inode preallocation we follow a list of heuristics
     136                 :            :  * based on file size. This can be found in ext4_mb_normalize_request. If
     137                 :            :  * we are doing a group prealloc we try to normalize the request to
     138                 :            :  * sbi->s_mb_group_prealloc.  The default value of s_mb_group_prealloc is
     139                 :            :  * dependent on the cluster size; for non-bigalloc file systems, it is
     140                 :            :  * 512 blocks. This can be tuned via
     141                 :            :  * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
     142                 :            :  * terms of number of blocks. If we have mounted the file system with -O
     143                 :            :  * stripe=<value> option the group prealloc request is normalized to the
     144                 :            :  * the smallest multiple of the stripe value (sbi->s_stripe) which is
     145                 :            :  * greater than the default mb_group_prealloc.
     146                 :            :  *
     147                 :            :  * The regular allocator (using the buddy cache) supports a few tunables.
     148                 :            :  *
     149                 :            :  * /sys/fs/ext4/<partition>/mb_min_to_scan
     150                 :            :  * /sys/fs/ext4/<partition>/mb_max_to_scan
     151                 :            :  * /sys/fs/ext4/<partition>/mb_order2_req
     152                 :            :  *
     153                 :            :  * The regular allocator uses buddy scan only if the request len is power of
     154                 :            :  * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
     155                 :            :  * value of s_mb_order2_reqs can be tuned via
     156                 :            :  * /sys/fs/ext4/<partition>/mb_order2_req.  If the request len is equal to
     157                 :            :  * stripe size (sbi->s_stripe), we try to search for contiguous block in
     158                 :            :  * stripe size. This should result in better allocation on RAID setups. If
     159                 :            :  * not, we search in the specific group using bitmap for best extents. The
     160                 :            :  * tunable min_to_scan and max_to_scan control the behaviour here.
     161                 :            :  * min_to_scan indicate how long the mballoc __must__ look for a best
     162                 :            :  * extent and max_to_scan indicates how long the mballoc __can__ look for a
     163                 :            :  * best extent in the found extents. Searching for the blocks starts with
     164                 :            :  * the group specified as the goal value in allocation context via
     165                 :            :  * ac_g_ex. Each group is first checked based on the criteria whether it
     166                 :            :  * can be used for allocation. ext4_mb_good_group explains how the groups are
     167                 :            :  * checked.
     168                 :            :  *
     169                 :            :  * Both the prealloc space are getting populated as above. So for the first
     170                 :            :  * request we will hit the buddy cache which will result in this prealloc
     171                 :            :  * space getting filled. The prealloc space is then later used for the
     172                 :            :  * subsequent request.
     173                 :            :  */
     174                 :            : 
     175                 :            : /*
     176                 :            :  * mballoc operates on the following data:
     177                 :            :  *  - on-disk bitmap
     178                 :            :  *  - in-core buddy (actually includes buddy and bitmap)
     179                 :            :  *  - preallocation descriptors (PAs)
     180                 :            :  *
     181                 :            :  * there are two types of preallocations:
     182                 :            :  *  - inode
     183                 :            :  *    assiged to specific inode and can be used for this inode only.
     184                 :            :  *    it describes part of inode's space preallocated to specific
     185                 :            :  *    physical blocks. any block from that preallocated can be used
     186                 :            :  *    independent. the descriptor just tracks number of blocks left
     187                 :            :  *    unused. so, before taking some block from descriptor, one must
     188                 :            :  *    make sure corresponded logical block isn't allocated yet. this
     189                 :            :  *    also means that freeing any block within descriptor's range
     190                 :            :  *    must discard all preallocated blocks.
     191                 :            :  *  - locality group
     192                 :            :  *    assigned to specific locality group which does not translate to
     193                 :            :  *    permanent set of inodes: inode can join and leave group. space
     194                 :            :  *    from this type of preallocation can be used for any inode. thus
     195                 :            :  *    it's consumed from the beginning to the end.
     196                 :            :  *
     197                 :            :  * relation between them can be expressed as:
     198                 :            :  *    in-core buddy = on-disk bitmap + preallocation descriptors
     199                 :            :  *
     200                 :            :  * this mean blocks mballoc considers used are:
     201                 :            :  *  - allocated blocks (persistent)
     202                 :            :  *  - preallocated blocks (non-persistent)
     203                 :            :  *
     204                 :            :  * consistency in mballoc world means that at any time a block is either
     205                 :            :  * free or used in ALL structures. notice: "any time" should not be read
     206                 :            :  * literally -- time is discrete and delimited by locks.
     207                 :            :  *
     208                 :            :  *  to keep it simple, we don't use block numbers, instead we count number of
     209                 :            :  *  blocks: how many blocks marked used/free in on-disk bitmap, buddy and PA.
     210                 :            :  *
     211                 :            :  * all operations can be expressed as:
     212                 :            :  *  - init buddy:                       buddy = on-disk + PAs
     213                 :            :  *  - new PA:                           buddy += N; PA = N
     214                 :            :  *  - use inode PA:                     on-disk += N; PA -= N
     215                 :            :  *  - discard inode PA                  buddy -= on-disk - PA; PA = 0
     216                 :            :  *  - use locality group PA             on-disk += N; PA -= N
     217                 :            :  *  - discard locality group PA         buddy -= PA; PA = 0
     218                 :            :  *  note: 'buddy -= on-disk - PA' is used to show that on-disk bitmap
     219                 :            :  *        is used in real operation because we can't know actual used
     220                 :            :  *        bits from PA, only from on-disk bitmap
     221                 :            :  *
     222                 :            :  * if we follow this strict logic, then all operations above should be atomic.
     223                 :            :  * given some of them can block, we'd have to use something like semaphores
     224                 :            :  * killing performance on high-end SMP hardware. let's try to relax it using
     225                 :            :  * the following knowledge:
     226                 :            :  *  1) if buddy is referenced, it's already initialized
     227                 :            :  *  2) while block is used in buddy and the buddy is referenced,
     228                 :            :  *     nobody can re-allocate that block
     229                 :            :  *  3) we work on bitmaps and '+' actually means 'set bits'. if on-disk has
     230                 :            :  *     bit set and PA claims same block, it's OK. IOW, one can set bit in
     231                 :            :  *     on-disk bitmap if buddy has same bit set or/and PA covers corresponded
     232                 :            :  *     block
     233                 :            :  *
     234                 :            :  * so, now we're building a concurrency table:
     235                 :            :  *  - init buddy vs.
     236                 :            :  *    - new PA
     237                 :            :  *      blocks for PA are allocated in the buddy, buddy must be referenced
     238                 :            :  *      until PA is linked to allocation group to avoid concurrent buddy init
     239                 :            :  *    - use inode PA
     240                 :            :  *      we need to make sure that either on-disk bitmap or PA has uptodate data
     241                 :            :  *      given (3) we care that PA-=N operation doesn't interfere with init
     242                 :            :  *    - discard inode PA
     243                 :            :  *      the simplest way would be to have buddy initialized by the discard
     244                 :            :  *    - use locality group PA
     245                 :            :  *      again PA-=N must be serialized with init
     246                 :            :  *    - discard locality group PA
     247                 :            :  *      the simplest way would be to have buddy initialized by the discard
     248                 :            :  *  - new PA vs.
     249                 :            :  *    - use inode PA
     250                 :            :  *      i_data_sem serializes them
     251                 :            :  *    - discard inode PA
     252                 :            :  *      discard process must wait until PA isn't used by another process
     253                 :            :  *    - use locality group PA
     254                 :            :  *      some mutex should serialize them
     255                 :            :  *    - discard locality group PA
     256                 :            :  *      discard process must wait until PA isn't used by another process
     257                 :            :  *  - use inode PA
     258                 :            :  *    - use inode PA
     259                 :            :  *      i_data_sem or another mutex should serializes them
     260                 :            :  *    - discard inode PA
     261                 :            :  *      discard process must wait until PA isn't used by another process
     262                 :            :  *    - use locality group PA
     263                 :            :  *      nothing wrong here -- they're different PAs covering different blocks
     264                 :            :  *    - discard locality group PA
     265                 :            :  *      discard process must wait until PA isn't used by another process
     266                 :            :  *
     267                 :            :  * now we're ready to make few consequences:
     268                 :            :  *  - PA is referenced and while it is no discard is possible
     269                 :            :  *  - PA is referenced until block isn't marked in on-disk bitmap
     270                 :            :  *  - PA changes only after on-disk bitmap
     271                 :            :  *  - discard must not compete with init. either init is done before
     272                 :            :  *    any discard or they're serialized somehow
     273                 :            :  *  - buddy init as sum of on-disk bitmap and PAs is done atomically
     274                 :            :  *
     275                 :            :  * a special case when we've used PA to emptiness. no need to modify buddy
     276                 :            :  * in this case, but we should care about concurrent init
     277                 :            :  *
     278                 :            :  */
     279                 :            : 
     280                 :            :  /*
     281                 :            :  * Logic in few words:
     282                 :            :  *
     283                 :            :  *  - allocation:
     284                 :            :  *    load group
     285                 :            :  *    find blocks
     286                 :            :  *    mark bits in on-disk bitmap
     287                 :            :  *    release group
     288                 :            :  *
     289                 :            :  *  - use preallocation:
     290                 :            :  *    find proper PA (per-inode or group)
     291                 :            :  *    load group
     292                 :            :  *    mark bits in on-disk bitmap
     293                 :            :  *    release group
     294                 :            :  *    release PA
     295                 :            :  *
     296                 :            :  *  - free:
     297                 :            :  *    load group
     298                 :            :  *    mark bits in on-disk bitmap
     299                 :            :  *    release group
     300                 :            :  *
     301                 :            :  *  - discard preallocations in group:
     302                 :            :  *    mark PAs deleted
     303                 :            :  *    move them onto local list
     304                 :            :  *    load on-disk bitmap
     305                 :            :  *    load group
     306                 :            :  *    remove PA from object (inode or locality group)
     307                 :            :  *    mark free blocks in-core
     308                 :            :  *
     309                 :            :  *  - discard inode's preallocations:
     310                 :            :  */
     311                 :            : 
     312                 :            : /*
     313                 :            :  * Locking rules
     314                 :            :  *
     315                 :            :  * Locks:
     316                 :            :  *  - bitlock on a group        (group)
     317                 :            :  *  - object (inode/locality)   (object)
     318                 :            :  *  - per-pa lock               (pa)
     319                 :            :  *
     320                 :            :  * Paths:
     321                 :            :  *  - new pa
     322                 :            :  *    object
     323                 :            :  *    group
     324                 :            :  *
     325                 :            :  *  - find and use pa:
     326                 :            :  *    pa
     327                 :            :  *
     328                 :            :  *  - release consumed pa:
     329                 :            :  *    pa
     330                 :            :  *    group
     331                 :            :  *    object
     332                 :            :  *
     333                 :            :  *  - generate in-core bitmap:
     334                 :            :  *    group
     335                 :            :  *        pa
     336                 :            :  *
     337                 :            :  *  - discard all for given object (inode, locality group):
     338                 :            :  *    object
     339                 :            :  *        pa
     340                 :            :  *    group
     341                 :            :  *
     342                 :            :  *  - discard all for given group:
     343                 :            :  *    group
     344                 :            :  *        pa
     345                 :            :  *    group
     346                 :            :  *        object
     347                 :            :  *
     348                 :            :  */
     349                 :            : static struct kmem_cache *ext4_pspace_cachep;
     350                 :            : static struct kmem_cache *ext4_ac_cachep;
     351                 :            : static struct kmem_cache *ext4_free_data_cachep;
     352                 :            : 
     353                 :            : /* We create slab caches for groupinfo data structures based on the
     354                 :            :  * superblock block size.  There will be one per mounted filesystem for
     355                 :            :  * each unique s_blocksize_bits */
     356                 :            : #define NR_GRPINFO_CACHES 8
     357                 :            : static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
     358                 :            : 
     359                 :            : static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
     360                 :            :         "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
     361                 :            :         "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
     362                 :            :         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
     363                 :            : };
     364                 :            : 
     365                 :            : static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
     366                 :            :                                         ext4_group_t group);
     367                 :            : static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
     368                 :            :                                                 ext4_group_t group);
     369                 :            : static void ext4_free_data_callback(struct super_block *sb,
     370                 :            :                                 struct ext4_journal_cb_entry *jce, int rc);
     371                 :            : 
     372                 :            : static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
     373                 :            : {
     374                 :            : #if BITS_PER_LONG == 64
     375                 :            :         *bit += ((unsigned long) addr & 7UL) << 3;
     376                 :            :         addr = (void *) ((unsigned long) addr & ~7UL);
     377                 :            : #elif BITS_PER_LONG == 32
     378                 :   15778789 :         *bit += ((unsigned long) addr & 3UL) << 3;
     379                 :   15461893 :         addr = (void *) ((unsigned long) addr & ~3UL);
     380                 :            : #else
     381                 :            : #error "how many bits you are?!"
     382                 :            : #endif
     383                 :            :         return addr;
     384                 :            : }
     385                 :            : 
     386                 :            : static inline int mb_test_bit(int bit, void *addr)
     387                 :            : {
     388                 :            :         /*
     389                 :            :          * ext4_test_bit on architecture like powerpc
     390                 :            :          * needs unsigned long aligned address
     391                 :            :          */
     392                 :            :         addr = mb_correct_addr_and_bit(&bit, addr);
     393                 :            :         return ext4_test_bit(bit, addr);
     394                 :            : }
     395                 :            : 
     396                 :            : static inline void mb_set_bit(int bit, void *addr)
     397                 :            : {
     398                 :            :         addr = mb_correct_addr_and_bit(&bit, addr);
     399                 :            :         ext4_set_bit(bit, addr);
     400                 :            : }
     401                 :            : 
     402                 :            : static inline void mb_clear_bit(int bit, void *addr)
     403                 :            : {
     404                 :            :         addr = mb_correct_addr_and_bit(&bit, addr);
     405                 :            :         ext4_clear_bit(bit, addr);
     406                 :            : }
     407                 :            : 
     408                 :            : static inline int mb_test_and_clear_bit(int bit, void *addr)
     409                 :            : {
     410                 :            :         addr = mb_correct_addr_and_bit(&bit, addr);
     411                 :            :         return ext4_test_and_clear_bit(bit, addr);
     412                 :            : }
     413                 :            : 
     414                 :            : static inline int mb_find_next_zero_bit(void *addr, int max, int start)
     415                 :            : {
     416                 :            :         int fix = 0, ret, tmpmax;
     417                 :            :         addr = mb_correct_addr_and_bit(&fix, addr);
     418                 :     639476 :         tmpmax = max + fix;
     419                 :     787705 :         start += fix;
     420                 :            : 
     421                 :     793225 :         ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
     422    [ + ][ #  # ]:     793342 :         if (ret > max)
         [ +  - ][ +  - ]
         [ #  # ][ +  - ]
                 [ +  - ]
     423                 :            :                 return max;
     424                 :            :         return ret;
     425                 :            : }
     426                 :            : 
     427                 :            : static inline int mb_find_next_bit(void *addr, int max, int start)
     428                 :            : {
     429                 :            :         int fix = 0, ret, tmpmax;
     430                 :            :         addr = mb_correct_addr_and_bit(&fix, addr);
     431                 :      82920 :         tmpmax = max + fix;
     432                 :      83283 :         start += fix;
     433                 :            : 
     434                 :      83282 :         ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
     435    [ + ][ #  # ]:      83282 :         if (ret > max)
                 [ +  - ]
     436                 :            :                 return max;
     437                 :            :         return ret;
     438                 :            : }
     439                 :            : 
     440                 :          0 : static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
     441                 :            : {
     442                 :            :         char *bb;
     443                 :            : 
     444         [ -  + ]:    2363891 :         BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
     445         [ -  + ]:    2363891 :         BUG_ON(max == NULL);
     446                 :            : 
     447         [ +  + ]:    2363891 :         if (order > e4b->bd_blkbits + 1) {
     448                 :        102 :                 *max = 0;
     449                 :        102 :                 return NULL;
     450                 :            :         }
     451                 :            : 
     452                 :            :         /* at order 0 we see each particular block */
     453         [ +  + ]:    2363789 :         if (order == 0) {
     454                 :    1272292 :                 *max = 1 << (e4b->bd_blkbits + 3);
     455                 :    1272292 :                 return e4b->bd_bitmap;
     456                 :            :         }
     457                 :            : 
     458                 :    1091497 :         bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
     459                 :    1091497 :         *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
     460                 :            : 
     461                 :    1091497 :         return bb;
     462                 :            : }
     463                 :            : 
     464                 :            : #ifdef DOUBLE_CHECK
     465                 :            : static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
     466                 :            :                            int first, int count)
     467                 :            : {
     468                 :            :         int i;
     469                 :            :         struct super_block *sb = e4b->bd_sb;
     470                 :            : 
     471                 :            :         if (unlikely(e4b->bd_info->bb_bitmap == NULL))
     472                 :            :                 return;
     473                 :            :         assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
     474                 :            :         for (i = 0; i < count; i++) {
     475                 :            :                 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
     476                 :            :                         ext4_fsblk_t blocknr;
     477                 :            : 
     478                 :            :                         blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
     479                 :            :                         blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
     480                 :            :                         ext4_grp_locked_error(sb, e4b->bd_group,
     481                 :            :                                               inode ? inode->i_ino : 0,
     482                 :            :                                               blocknr,
     483                 :            :                                               "freeing block already freed "
     484                 :            :                                               "(bit %u)",
     485                 :            :                                               first + i);
     486                 :            :                 }
     487                 :            :                 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
     488                 :            :         }
     489                 :            : }
     490                 :            : 
     491                 :            : static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
     492                 :            : {
     493                 :            :         int i;
     494                 :            : 
     495                 :            :         if (unlikely(e4b->bd_info->bb_bitmap == NULL))
     496                 :            :                 return;
     497                 :            :         assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
     498                 :            :         for (i = 0; i < count; i++) {
     499                 :            :                 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
     500                 :            :                 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
     501                 :            :         }
     502                 :            : }
     503                 :            : 
     504                 :            : static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
     505                 :            : {
     506                 :            :         if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
     507                 :            :                 unsigned char *b1, *b2;
     508                 :            :                 int i;
     509                 :            :                 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
     510                 :            :                 b2 = (unsigned char *) bitmap;
     511                 :            :                 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
     512                 :            :                         if (b1[i] != b2[i]) {
     513                 :            :                                 ext4_msg(e4b->bd_sb, KERN_ERR,
     514                 :            :                                          "corruption in group %u "
     515                 :            :                                          "at byte %u(%u): %x in copy != %x "
     516                 :            :                                          "on disk/prealloc",
     517                 :            :                                          e4b->bd_group, i, i * 8, b1[i], b2[i]);
     518                 :            :                                 BUG();
     519                 :            :                         }
     520                 :            :                 }
     521                 :            :         }
     522                 :            : }
     523                 :            : 
     524                 :            : #else
     525                 :            : static inline void mb_free_blocks_double(struct inode *inode,
     526                 :            :                                 struct ext4_buddy *e4b, int first, int count)
     527                 :            : {
     528                 :            :         return;
     529                 :            : }
     530                 :            : static inline void mb_mark_used_double(struct ext4_buddy *e4b,
     531                 :            :                                                 int first, int count)
     532                 :            : {
     533                 :            :         return;
     534                 :            : }
     535                 :            : static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
     536                 :            : {
     537                 :            :         return;
     538                 :            : }
     539                 :            : #endif
     540                 :            : 
     541                 :            : #ifdef AGGRESSIVE_CHECK
     542                 :            : 
     543                 :            : #define MB_CHECK_ASSERT(assert)                                         \
     544                 :            : do {                                                                    \
     545                 :            :         if (!(assert)) {                                                \
     546                 :            :                 printk(KERN_EMERG                                       \
     547                 :            :                         "Assertion failure in %s() at %s:%d: \"%s\"\n",     \
     548                 :            :                         function, file, line, # assert);                \
     549                 :            :                 BUG();                                                  \
     550                 :            :         }                                                               \
     551                 :            : } while (0)
     552                 :            : 
     553                 :            : static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
     554                 :            :                                 const char *function, int line)
     555                 :            : {
     556                 :            :         struct super_block *sb = e4b->bd_sb;
     557                 :            :         int order = e4b->bd_blkbits + 1;
     558                 :            :         int max;
     559                 :            :         int max2;
     560                 :            :         int i;
     561                 :            :         int j;
     562                 :            :         int k;
     563                 :            :         int count;
     564                 :            :         struct ext4_group_info *grp;
     565                 :            :         int fragments = 0;
     566                 :            :         int fstart;
     567                 :            :         struct list_head *cur;
     568                 :            :         void *buddy;
     569                 :            :         void *buddy2;
     570                 :            : 
     571                 :            :         {
     572                 :            :                 static int mb_check_counter;
     573                 :            :                 if (mb_check_counter++ % 100 != 0)
     574                 :            :                         return 0;
     575                 :            :         }
     576                 :            : 
     577                 :            :         while (order > 1) {
     578                 :            :                 buddy = mb_find_buddy(e4b, order, &max);
     579                 :            :                 MB_CHECK_ASSERT(buddy);
     580                 :            :                 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
     581                 :            :                 MB_CHECK_ASSERT(buddy2);
     582                 :            :                 MB_CHECK_ASSERT(buddy != buddy2);
     583                 :            :                 MB_CHECK_ASSERT(max * 2 == max2);
     584                 :            : 
     585                 :            :                 count = 0;
     586                 :            :                 for (i = 0; i < max; i++) {
     587                 :            : 
     588                 :            :                         if (mb_test_bit(i, buddy)) {
     589                 :            :                                 /* only single bit in buddy2 may be 1 */
     590                 :            :                                 if (!mb_test_bit(i << 1, buddy2)) {
     591                 :            :                                         MB_CHECK_ASSERT(
     592                 :            :                                                 mb_test_bit((i<<1)+1, buddy2));
     593                 :            :                                 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
     594                 :            :                                         MB_CHECK_ASSERT(
     595                 :            :                                                 mb_test_bit(i << 1, buddy2));
     596                 :            :                                 }
     597                 :            :                                 continue;
     598                 :            :                         }
     599                 :            : 
     600                 :            :                         /* both bits in buddy2 must be 1 */
     601                 :            :                         MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
     602                 :            :                         MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
     603                 :            : 
     604                 :            :                         for (j = 0; j < (1 << order); j++) {
     605                 :            :                                 k = (i * (1 << order)) + j;
     606                 :            :                                 MB_CHECK_ASSERT(
     607                 :            :                                         !mb_test_bit(k, e4b->bd_bitmap));
     608                 :            :                         }
     609                 :            :                         count++;
     610                 :            :                 }
     611                 :            :                 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
     612                 :            :                 order--;
     613                 :            :         }
     614                 :            : 
     615                 :            :         fstart = -1;
     616                 :            :         buddy = mb_find_buddy(e4b, 0, &max);
     617                 :            :         for (i = 0; i < max; i++) {
     618                 :            :                 if (!mb_test_bit(i, buddy)) {
     619                 :            :                         MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
     620                 :            :                         if (fstart == -1) {
     621                 :            :                                 fragments++;
     622                 :            :                                 fstart = i;
     623                 :            :                         }
     624                 :            :                         continue;
     625                 :            :                 }
     626                 :            :                 fstart = -1;
     627                 :            :                 /* check used bits only */
     628                 :            :                 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
     629                 :            :                         buddy2 = mb_find_buddy(e4b, j, &max2);
     630                 :            :                         k = i >> j;
     631                 :            :                         MB_CHECK_ASSERT(k < max2);
     632                 :            :                         MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
     633                 :            :                 }
     634                 :            :         }
     635                 :            :         MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
     636                 :            :         MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
     637                 :            : 
     638                 :            :         grp = ext4_get_group_info(sb, e4b->bd_group);
     639                 :            :         list_for_each(cur, &grp->bb_prealloc_list) {
     640                 :            :                 ext4_group_t groupnr;
     641                 :            :                 struct ext4_prealloc_space *pa;
     642                 :            :                 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
     643                 :            :                 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
     644                 :            :                 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
     645                 :            :                 for (i = 0; i < pa->pa_len; i++)
     646                 :            :                         MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
     647                 :            :         }
     648                 :            :         return 0;
     649                 :            : }
     650                 :            : #undef MB_CHECK_ASSERT
     651                 :            : #define mb_check_buddy(e4b) __mb_check_buddy(e4b,       \
     652                 :            :                                         __FILE__, __func__, __LINE__)
     653                 :            : #else
     654                 :            : #define mb_check_buddy(e4b)
     655                 :            : #endif
     656                 :            : 
     657                 :            : /*
     658                 :            :  * Divide blocks started from @first with length @len into
     659                 :            :  * smaller chunks with power of 2 blocks.
     660                 :            :  * Clear the bits in bitmap which the blocks of the chunk(s) covered,
     661                 :            :  * then increase bb_counters[] for corresponded chunk size.
     662                 :            :  */
     663                 :          0 : static void ext4_mb_mark_free_simple(struct super_block *sb,
     664                 :            :                                 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
     665                 :            :                                         struct ext4_group_info *grp)
     666                 :            : {
     667                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
     668                 :            :         ext4_grpblk_t min;
     669                 :            :         ext4_grpblk_t max;
     670                 :            :         ext4_grpblk_t chunk;
     671                 :            :         unsigned short border;
     672                 :            : 
     673         [ -  + ]:        311 :         BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
     674                 :            : 
     675                 :        311 :         border = 2 << sb->s_blocksize_bits;
     676                 :            : 
     677         [ +  + ]:       1578 :         while (len > 0) {
     678                 :            :                 /* find how many blocks can be covered since this position */
     679                 :       2534 :                 max = ffs(first | border) - 1;
     680                 :            : 
     681                 :            :                 /* find how many blocks of power 2 we need to mark */
     682                 :       1267 :                 min = fls(len) - 1;
     683                 :            : 
     684         [ +  + ]:       1267 :                 if (max < min)
     685                 :            :                         min = max;
     686                 :       1267 :                 chunk = 1 << min;
     687                 :            : 
     688                 :            :                 /* mark multiblock chunks only */
     689                 :       1267 :                 grp->bb_counters[min]++;
     690         [ +  + ]:       1267 :                 if (min > 0)
     691                 :       2266 :                         mb_clear_bit(first >> min,
     692                 :       1133 :                                      buddy + sbi->s_mb_offsets[min]);
     693                 :            : 
     694                 :       1267 :                 len -= chunk;
     695                 :       1267 :                 first += chunk;
     696                 :            :         }
     697                 :        311 : }
     698                 :            : 
     699                 :            : /*
     700                 :            :  * Cache the order of the largest free extent we have available in this block
     701                 :            :  * group.
     702                 :            :  */
     703                 :            : static void
     704                 :            : mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
     705                 :            : {
     706                 :            :         int i;
     707                 :            :         int bits;
     708                 :            : 
     709                 :     242387 :         grp->bb_largest_free_order = -1; /* uninit */
     710                 :            : 
     711                 :     242387 :         bits = sb->s_blocksize_bits + 1;
     712    [ +  + ][ + ]:     301170 :         for (i = bits; i >= 0; i--) {
                 [ +  + ]
     713 [ +  + ][ +  + ]:     301139 :                 if (grp->bb_counters[i] > 0) {
                 [ +  + ]
     714                 :     242356 :                         grp->bb_largest_free_order = i;
     715                 :            :                         break;
     716                 :            :                 }
     717                 :            :         }
     718                 :            : }
     719                 :            : 
     720                 :            : static noinline_for_stack
     721                 :          0 : void ext4_mb_generate_buddy(struct super_block *sb,
     722                 :            :                                 void *buddy, void *bitmap, ext4_group_t group)
     723                 :            : {
     724                 :            :         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
     725                 :        131 :         ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
     726                 :            :         ext4_grpblk_t i = 0;
     727                 :            :         ext4_grpblk_t first;
     728                 :            :         ext4_grpblk_t len;
     729                 :            :         unsigned free = 0;
     730                 :            :         unsigned fragments = 0;
     731         [ +  - ]:        131 :         unsigned long long period = get_cycles();
     732                 :            : 
     733                 :            :         /* initialize buddy from bitmap which is aggregation
     734                 :            :          * of on-disk bitmap and preallocations */
     735                 :            :         i = mb_find_next_zero_bit(bitmap, max, 0);
     736                 :        131 :         grp->bb_first_free = i;
     737         [ +  + ]:        494 :         while (i < max) {
     738                 :        363 :                 fragments++;
     739                 :            :                 first = i;
     740                 :            :                 i = mb_find_next_bit(bitmap, max, i);
     741                 :        363 :                 len = i - first;
     742                 :        363 :                 free += len;
     743         [ +  + ]:        363 :                 if (len > 1)
     744                 :        311 :                         ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
     745                 :            :                 else
     746                 :         52 :                         grp->bb_counters[0]++;
     747         [ +  + ]:        363 :                 if (i < max)
     748                 :            :                         i = mb_find_next_zero_bit(bitmap, max, i);
     749                 :            :         }
     750                 :        131 :         grp->bb_fragments = fragments;
     751                 :            : 
     752         [ -  + ]:        131 :         if (free != grp->bb_free) {
     753                 :          0 :                 ext4_grp_locked_error(sb, group, 0, 0,
     754                 :            :                                       "%u clusters in bitmap, %u in gd; "
     755                 :            :                                       "block bitmap corrupt.",
     756                 :            :                                       free, grp->bb_free);
     757                 :            :                 /*
     758                 :            :                  * If we intend to continue, we consider group descriptor
     759                 :            :                  * corrupt and update bb_free using bitmap value
     760                 :            :                  */
     761                 :          0 :                 grp->bb_free = free;
     762                 :          0 :                 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
     763                 :            :         }
     764                 :            :         mb_set_largest_free_order(sb, grp);
     765                 :            : 
     766                 :        131 :         clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
     767                 :            : 
     768         [ +  - ]:        131 :         period = get_cycles() - period;
     769                 :            :         spin_lock(&EXT4_SB(sb)->s_bal_lock);
     770                 :        131 :         EXT4_SB(sb)->s_mb_buddies_generated++;
     771                 :        131 :         EXT4_SB(sb)->s_mb_generation_time += period;
     772                 :            :         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
     773                 :        131 : }
     774                 :            : 
     775                 :          0 : static void mb_regenerate_buddy(struct ext4_buddy *e4b)
     776                 :            : {
     777                 :            :         int count;
     778                 :            :         int order = 1;
     779                 :            :         void *buddy;
     780                 :            : 
     781         [ #  # ]:          0 :         while ((buddy = mb_find_buddy(e4b, order++, &count))) {
     782                 :          0 :                 ext4_set_bits(buddy, 0, count);
     783                 :            :         }
     784                 :          0 :         e4b->bd_info->bb_fragments = 0;
     785         [ #  # ]:          0 :         memset(e4b->bd_info->bb_counters, 0,
     786                 :            :                 sizeof(*e4b->bd_info->bb_counters) *
     787                 :            :                 (e4b->bd_sb->s_blocksize_bits + 2));
     788                 :            : 
     789                 :          0 :         ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
     790                 :            :                 e4b->bd_bitmap, e4b->bd_group);
     791                 :          0 : }
     792                 :            : 
     793                 :            : /* The buddy information is attached the buddy cache inode
     794                 :            :  * for convenience. The information regarding each group
     795                 :            :  * is loaded via ext4_mb_load_buddy. The information involve
     796                 :            :  * block bitmap and buddy information. The information are
     797                 :            :  * stored in the inode as
     798                 :            :  *
     799                 :            :  * {                        page                        }
     800                 :            :  * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
     801                 :            :  *
     802                 :            :  *
     803                 :            :  * one block each for bitmap and buddy information.
     804                 :            :  * So for each group we take up 2 blocks. A page can
     805                 :            :  * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
     806                 :            :  * So it can have information regarding groups_per_page which
     807                 :            :  * is blocks_per_page/2
     808                 :            :  *
     809                 :            :  * Locking note:  This routine takes the block group lock of all groups
     810                 :            :  * for this page; do not hold this lock when calling this routine!
     811                 :            :  */
     812                 :            : 
     813                 :          0 : static int ext4_mb_init_cache(struct page *page, char *incore)
     814                 :            : {
     815                 :            :         ext4_group_t ngroups;
     816                 :            :         int blocksize;
     817                 :            :         int blocks_per_page;
     818                 :            :         int groups_per_page;
     819                 :            :         int err = 0;
     820                 :            :         int i;
     821                 :            :         ext4_group_t first_group, group;
     822                 :            :         int first_block;
     823                 :            :         struct super_block *sb;
     824                 :            :         struct buffer_head *bhs;
     825                 :            :         struct buffer_head **bh = NULL;
     826                 :            :         struct inode *inode;
     827                 :            :         char *data;
     828                 :            :         char *bitmap;
     829                 :            :         struct ext4_group_info *grinfo;
     830                 :            : 
     831                 :            :         mb_debug(1, "init page %lu\n", page->index);
     832                 :            : 
     833                 :        262 :         inode = page->mapping->host;
     834                 :        262 :         sb = inode->i_sb;
     835                 :            :         ngroups = ext4_get_groups_count(sb);
     836                 :        262 :         blocksize = 1 << inode->i_blkbits;
     837                 :        262 :         blocks_per_page = PAGE_CACHE_SIZE / blocksize;
     838                 :            : 
     839                 :        262 :         groups_per_page = blocks_per_page >> 1;
     840         [ +  - ]:        262 :         if (groups_per_page == 0)
     841                 :            :                 groups_per_page = 1;
     842                 :            : 
     843                 :            :         /* allocate buffer_heads to read bitmaps */
     844         [ +  - ]:        262 :         if (groups_per_page > 1) {
     845                 :          0 :                 i = sizeof(struct buffer_head *) * groups_per_page;
     846                 :            :                 bh = kzalloc(i, GFP_NOFS);
     847         [ +  - ]:        262 :                 if (bh == NULL) {
     848                 :            :                         err = -ENOMEM;
     849                 :            :                         goto out;
     850                 :            :                 }
     851                 :            :         } else
     852                 :            :                 bh = &bhs;
     853                 :            : 
     854                 :        262 :         first_group = page->index * blocks_per_page / 2;
     855                 :            : 
     856                 :            :         /* read all groups the page covers into the cache */
     857         [ +  + ]:        524 :         for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
     858         [ +  - ]:        262 :                 if (group >= ngroups)
     859                 :            :                         break;
     860                 :            : 
     861                 :            :                 grinfo = ext4_get_group_info(sb, group);
     862                 :            :                 /*
     863                 :            :                  * If page is uptodate then we came here after online resize
     864                 :            :                  * which added some new uninitialized group info structs, so
     865                 :            :                  * we must skip all initialized uptodate buddies on the page,
     866                 :            :                  * which may be currently in use by an allocating task.
     867                 :            :                  */
     868 [ -  + ][ #  # ]:        262 :                 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
     869                 :          0 :                         bh[i] = NULL;
     870                 :          0 :                         continue;
     871                 :            :                 }
     872         [ +  - ]:        262 :                 if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
     873                 :            :                         err = -ENOMEM;
     874                 :            :                         goto out;
     875                 :            :                 }
     876                 :            :                 mb_debug(1, "read bitmap for group %u\n", group);
     877                 :            :         }
     878                 :            : 
     879                 :            :         /* wait for I/O completion */
     880         [ +  + ]:        524 :         for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
     881 [ +  - ][ +  - ]:        262 :                 if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
     882                 :            :                         err = -EIO;
     883                 :            :                         goto out;
     884                 :            :                 }
     885                 :            :         }
     886                 :            : 
     887                 :        262 :         first_block = page->index * blocks_per_page;
     888         [ +  + ]:        524 :         for (i = 0; i < blocks_per_page; i++) {
     889                 :        262 :                 group = (first_block + i) >> 1;
     890         [ +  - ]:        262 :                 if (group >= ngroups)
     891                 :            :                         break;
     892                 :            : 
     893         [ -  + ]:        262 :                 if (!bh[group - first_group])
     894                 :            :                         /* skip initialized uptodate buddy */
     895                 :          0 :                         continue;
     896                 :            : 
     897                 :            :                 /*
     898                 :            :                  * data carry information regarding this
     899                 :            :                  * particular group in the format specified
     900                 :            :                  * above
     901                 :            :                  *
     902                 :            :                  */
     903                 :        262 :                 data = page_address(page) + (i * blocksize);
     904                 :        262 :                 bitmap = bh[group - first_group]->b_data;
     905                 :            : 
     906                 :            :                 /*
     907                 :            :                  * We place the buddy block and bitmap block
     908                 :            :                  * close together
     909                 :            :                  */
     910         [ +  + ]:        262 :                 if ((first_block + i) & 1) {
     911                 :            :                         /* this is block of buddy */
     912         [ -  + ]:        131 :                         BUG_ON(incore == NULL);
     913                 :            :                         mb_debug(1, "put buddy for group %u in page %lu/%x\n",
     914                 :            :                                 group, page->index, i * blocksize);
     915                 :            :                         trace_ext4_mb_buddy_bitmap_load(sb, group);
     916                 :            :                         grinfo = ext4_get_group_info(sb, group);
     917                 :        131 :                         grinfo->bb_fragments = 0;
     918         [ +  - ]:        131 :                         memset(grinfo->bb_counters, 0,
     919                 :            :                                sizeof(*grinfo->bb_counters) *
     920                 :            :                                 (sb->s_blocksize_bits+2));
     921                 :            :                         /*
     922                 :            :                          * incore got set to the group block bitmap below
     923                 :            :                          */
     924                 :            :                         ext4_lock_group(sb, group);
     925                 :            :                         /* init the buddy */
     926         [ +  - ]:        131 :                         memset(data, 0xff, blocksize);
     927                 :        131 :                         ext4_mb_generate_buddy(sb, data, incore, group);
     928                 :            :                         ext4_unlock_group(sb, group);
     929                 :            :                         incore = NULL;
     930                 :            :                 } else {
     931                 :            :                         /* this is block of bitmap */
     932         [ -  + ]:        131 :                         BUG_ON(incore != NULL);
     933                 :            :                         mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
     934                 :            :                                 group, page->index, i * blocksize);
     935                 :            :                         trace_ext4_mb_bitmap_load(sb, group);
     936                 :            : 
     937                 :            :                         /* see comments in ext4_mb_put_pa() */
     938                 :            :                         ext4_lock_group(sb, group);
     939                 :        131 :                         memcpy(data, bitmap, blocksize);
     940                 :            : 
     941                 :            :                         /* mark all preallocated blks used in in-core bitmap */
     942                 :        131 :                         ext4_mb_generate_from_pa(sb, data, group);
     943                 :        131 :                         ext4_mb_generate_from_freelist(sb, data, group);
     944                 :            :                         ext4_unlock_group(sb, group);
     945                 :            : 
     946                 :            :                         /* set incore so that the buddy information can be
     947                 :            :                          * generated using this
     948                 :            :                          */
     949                 :            :                         incore = data;
     950                 :            :                 }
     951                 :            :         }
     952                 :            :         SetPageUptodate(page);
     953                 :            : 
     954                 :            : out:
     955         [ +  - ]:        262 :         if (bh) {
     956         [ +  + ]:        524 :                 for (i = 0; i < groups_per_page; i++)
     957                 :        262 :                         brelse(bh[i]);
     958         [ -  + ]:        262 :                 if (bh != &bhs)
     959                 :          0 :                         kfree(bh);
     960                 :            :         }
     961                 :        262 :         return err;
     962                 :            : }
     963                 :            : 
     964                 :            : /*
     965                 :            :  * Lock the buddy and bitmap pages. This make sure other parallel init_group
     966                 :            :  * on the same buddy page doesn't happen whild holding the buddy page lock.
     967                 :            :  * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
     968                 :            :  * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
     969                 :            :  */
     970                 :          0 : static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
     971                 :            :                 ext4_group_t group, struct ext4_buddy *e4b)
     972                 :            : {
     973                 :         97 :         struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
     974                 :            :         int block, pnum, poff;
     975                 :            :         int blocks_per_page;
     976                 :            :         struct page *page;
     977                 :            : 
     978                 :         97 :         e4b->bd_buddy_page = NULL;
     979                 :         97 :         e4b->bd_bitmap_page = NULL;
     980                 :            : 
     981                 :         97 :         blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
     982                 :            :         /*
     983                 :            :          * the buddy cache inode stores the block bitmap
     984                 :            :          * and buddy information in consecutive blocks.
     985                 :            :          * So for each group we need two blocks.
     986                 :            :          */
     987                 :         97 :         block = group * 2;
     988                 :         97 :         pnum = block / blocks_per_page;
     989                 :         97 :         poff = block % blocks_per_page;
     990                 :         97 :         page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
     991         [ +  - ]:         97 :         if (!page)
     992                 :            :                 return -EIO;
     993         [ -  + ]:         97 :         BUG_ON(page->mapping != inode->i_mapping);
     994                 :         97 :         e4b->bd_bitmap_page = page;
     995                 :         97 :         e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
     996                 :            : 
     997         [ +  - ]:         97 :         if (blocks_per_page >= 2) {
     998                 :            :                 /* buddy and bitmap are on the same page */
     999                 :            :                 return 0;
    1000                 :            :         }
    1001                 :            : 
    1002                 :         97 :         block++;
    1003                 :         97 :         pnum = block / blocks_per_page;
    1004                 :         97 :         page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
    1005            [ + ]:         97 :         if (!page)
    1006                 :            :                 return -EIO;
    1007         [ -  + ]:        194 :         BUG_ON(page->mapping != inode->i_mapping);
    1008                 :         97 :         e4b->bd_buddy_page = page;
    1009                 :         97 :         return 0;
    1010                 :            : }
    1011                 :            : 
    1012                 :         97 : static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
    1013                 :            : {
    1014         [ +  - ]:         97 :         if (e4b->bd_bitmap_page) {
    1015                 :         97 :                 unlock_page(e4b->bd_bitmap_page);
    1016                 :         97 :                 page_cache_release(e4b->bd_bitmap_page);
    1017                 :            :         }
    1018         [ +  - ]:         97 :         if (e4b->bd_buddy_page) {
    1019                 :         97 :                 unlock_page(e4b->bd_buddy_page);
    1020                 :         97 :                 page_cache_release(e4b->bd_buddy_page);
    1021                 :            :         }
    1022                 :         97 : }
    1023                 :            : 
    1024                 :            : /*
    1025                 :            :  * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
    1026                 :            :  * block group lock of all groups for this page; do not hold the BG lock when
    1027                 :            :  * calling this routine!
    1028                 :            :  */
    1029                 :            : static noinline_for_stack
    1030                 :          0 : int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
    1031                 :            : {
    1032                 :            : 
    1033                 :            :         struct ext4_group_info *this_grp;
    1034                 :            :         struct ext4_buddy e4b;
    1035                 :            :         struct page *page;
    1036                 :            :         int ret = 0;
    1037                 :            : 
    1038                 :            :         might_sleep();
    1039                 :            :         mb_debug(1, "init group %u\n", group);
    1040                 :            :         this_grp = ext4_get_group_info(sb, group);
    1041                 :            :         /*
    1042                 :            :          * This ensures that we don't reinit the buddy cache
    1043                 :            :          * page which map to the group from which we are already
    1044                 :            :          * allocating. If we are looking at the buddy cache we would
    1045                 :            :          * have taken a reference using ext4_mb_load_buddy and that
    1046                 :            :          * would have pinned buddy page to page cache.
    1047                 :            :          */
    1048                 :         97 :         ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
    1049 [ +  - ][ +  - ]:         97 :         if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
    1050                 :            :                 /*
    1051                 :            :                  * somebody initialized the group
    1052                 :            :                  * return without doing anything
    1053                 :            :                  */
    1054                 :            :                 goto err;
    1055                 :            :         }
    1056                 :            : 
    1057                 :         97 :         page = e4b.bd_bitmap_page;
    1058                 :         97 :         ret = ext4_mb_init_cache(page, NULL);
    1059         [ +  - ]:         97 :         if (ret)
    1060                 :            :                 goto err;
    1061         [ +  - ]:         97 :         if (!PageUptodate(page)) {
    1062                 :            :                 ret = -EIO;
    1063                 :            :                 goto err;
    1064                 :            :         }
    1065                 :         97 :         mark_page_accessed(page);
    1066                 :            : 
    1067         [ +  - ]:         97 :         if (e4b.bd_buddy_page == NULL) {
    1068                 :            :                 /*
    1069                 :            :                  * If both the bitmap and buddy are in
    1070                 :            :                  * the same page we don't need to force
    1071                 :            :                  * init the buddy
    1072                 :            :                  */
    1073                 :            :                 ret = 0;
    1074                 :            :                 goto err;
    1075                 :            :         }
    1076                 :            :         /* init buddy cache */
    1077                 :            :         page = e4b.bd_buddy_page;
    1078                 :         97 :         ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
    1079         [ +  - ]:         97 :         if (ret)
    1080                 :            :                 goto err;
    1081         [ +  - ]:         97 :         if (!PageUptodate(page)) {
    1082                 :            :                 ret = -EIO;
    1083                 :            :                 goto err;
    1084                 :            :         }
    1085                 :         97 :         mark_page_accessed(page);
    1086                 :            : err:
    1087                 :         97 :         ext4_mb_put_buddy_page_lock(&e4b);
    1088                 :         97 :         return ret;
    1089                 :            : }
    1090                 :            : 
    1091                 :            : /*
    1092                 :            :  * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
    1093                 :            :  * block group lock of all groups for this page; do not hold the BG lock when
    1094                 :            :  * calling this routine!
    1095                 :            :  */
    1096                 :            : static noinline_for_stack int
    1097                 :          0 : ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
    1098                 :            :                                         struct ext4_buddy *e4b)
    1099                 :            : {
    1100                 :            :         int blocks_per_page;
    1101                 :            :         int block;
    1102                 :            :         int pnum;
    1103                 :            :         int poff;
    1104                 :            :         struct page *page;
    1105                 :            :         int ret;
    1106                 :            :         struct ext4_group_info *grp;
    1107                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1108                 :     322771 :         struct inode *inode = sbi->s_buddy_cache;
    1109                 :            : 
    1110                 :            :         might_sleep();
    1111                 :            :         mb_debug(1, "load group %u\n", group);
    1112                 :            : 
    1113                 :     322771 :         blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
    1114                 :            :         grp = ext4_get_group_info(sb, group);
    1115                 :            : 
    1116                 :     322771 :         e4b->bd_blkbits = sb->s_blocksize_bits;
    1117                 :     322771 :         e4b->bd_info = grp;
    1118                 :     322771 :         e4b->bd_sb = sb;
    1119                 :     322771 :         e4b->bd_group = group;
    1120                 :     322771 :         e4b->bd_buddy_page = NULL;
    1121                 :     322771 :         e4b->bd_bitmap_page = NULL;
    1122                 :            : 
    1123         [ +  + ]:     322771 :         if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
    1124                 :            :                 /*
    1125                 :            :                  * we need full data about the group
    1126                 :            :                  * to make a good selection
    1127                 :            :                  */
    1128                 :         58 :                 ret = ext4_mb_init_group(sb, group);
    1129         [ +  - ]:         58 :                 if (ret)
    1130                 :            :                         return ret;
    1131                 :            :         }
    1132                 :            : 
    1133                 :            :         /*
    1134                 :            :          * the buddy cache inode stores the block bitmap
    1135                 :            :          * and buddy information in consecutive blocks.
    1136                 :            :          * So for each group we need two blocks.
    1137                 :            :          */
    1138                 :     322771 :         block = group * 2;
    1139                 :     322771 :         pnum = block / blocks_per_page;
    1140                 :     322771 :         poff = block % blocks_per_page;
    1141                 :            : 
    1142                 :            :         /* we could use find_or_create_page(), but it locks page
    1143                 :            :          * what we'd like to avoid in fast path ... */
    1144                 :     322771 :         page = find_get_page(inode->i_mapping, pnum);
    1145 [ +  + ][ +  + ]:     968337 :         if (page == NULL || !PageUptodate(page)) {
    1146         [ +  + ]:         45 :                 if (page)
    1147                 :            :                         /*
    1148                 :            :                          * drop the page reference and try
    1149                 :            :                          * to get the page with lock. If we
    1150                 :            :                          * are not uptodate that implies
    1151                 :            :                          * somebody just created the page but
    1152                 :            :                          * is yet to initialize the same. So
    1153                 :            :                          * wait for it to initialize.
    1154                 :            :                          */
    1155                 :         11 :                         page_cache_release(page);
    1156                 :         45 :                 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
    1157         [ +  - ]:         45 :                 if (page) {
    1158         [ -  + ]:         45 :                         BUG_ON(page->mapping != inode->i_mapping);
    1159         [ +  + ]:         45 :                         if (!PageUptodate(page)) {
    1160                 :         34 :                                 ret = ext4_mb_init_cache(page, NULL);
    1161         [ -  + ]:         34 :                                 if (ret) {
    1162                 :          0 :                                         unlock_page(page);
    1163                 :          0 :                                         goto err;
    1164                 :            :                                 }
    1165                 :         34 :                                 mb_cmp_bitmaps(e4b, page_address(page) +
    1166                 :            :                                                (poff * sb->s_blocksize));
    1167                 :            :                         }
    1168                 :         45 :                         unlock_page(page);
    1169                 :            :                 }
    1170                 :            :         }
    1171 [ +  + ][ +  - ]:     968370 :         if (page == NULL || !PageUptodate(page)) {
    1172                 :            :                 ret = -EIO;
    1173                 :            :                 goto err;
    1174                 :            :         }
    1175                 :     322799 :         e4b->bd_bitmap_page = page;
    1176                 :     322799 :         e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
    1177                 :     322785 :         mark_page_accessed(page);
    1178                 :            : 
    1179                 :     322792 :         block++;
    1180                 :     322792 :         pnum = block / blocks_per_page;
    1181                 :     322792 :         poff = block % blocks_per_page;
    1182                 :            : 
    1183                 :     322792 :         page = find_get_page(inode->i_mapping, pnum);
    1184 [ +  + ][ +  + ]:     645560 :         if (page == NULL || !PageUptodate(page)) {
    1185         [ +  + ]:         39 :                 if (page)
    1186                 :          5 :                         page_cache_release(page);
    1187                 :         39 :                 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
    1188         [ +  - ]:         39 :                 if (page) {
    1189         [ -  + ]:         39 :                         BUG_ON(page->mapping != inode->i_mapping);
    1190         [ +  + ]:         39 :                         if (!PageUptodate(page)) {
    1191                 :         34 :                                 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
    1192         [ -  + ]:         34 :                                 if (ret) {
    1193                 :          0 :                                         unlock_page(page);
    1194                 :          0 :                                         goto err;
    1195                 :            :                                 }
    1196                 :            :                         }
    1197                 :         39 :                         unlock_page(page);
    1198                 :            :                 }
    1199                 :            :         }
    1200 [ +  - ][ +  - ]:     645595 :         if (page == NULL || !PageUptodate(page)) {
    1201                 :            :                 ret = -EIO;
    1202                 :            :                 goto err;
    1203                 :            :         }
    1204                 :     322798 :         e4b->bd_buddy_page = page;
    1205                 :     322798 :         e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
    1206                 :     322800 :         mark_page_accessed(page);
    1207                 :            : 
    1208         [ -  + ]:     322791 :         BUG_ON(e4b->bd_bitmap_page == NULL);
    1209         [ -  + ]:     322791 :         BUG_ON(e4b->bd_buddy_page == NULL);
    1210                 :            : 
    1211                 :            :         return 0;
    1212                 :            : 
    1213                 :            : err:
    1214         [ #  # ]:     322771 :         if (page)
    1215                 :          0 :                 page_cache_release(page);
    1216         [ #  # ]:          0 :         if (e4b->bd_bitmap_page)
    1217                 :          0 :                 page_cache_release(e4b->bd_bitmap_page);
    1218         [ #  # ]:          0 :         if (e4b->bd_buddy_page)
    1219                 :          0 :                 page_cache_release(e4b->bd_buddy_page);
    1220                 :          0 :         e4b->bd_buddy = NULL;
    1221                 :          0 :         e4b->bd_bitmap = NULL;
    1222                 :          0 :         return ret;
    1223                 :            : }
    1224                 :            : 
    1225                 :     322796 : static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
    1226                 :            : {
    1227         [ +  - ]:     322796 :         if (e4b->bd_bitmap_page)
    1228                 :     322796 :                 page_cache_release(e4b->bd_bitmap_page);
    1229         [ +  - ]:     322797 :         if (e4b->bd_buddy_page)
    1230                 :     322797 :                 page_cache_release(e4b->bd_buddy_page);
    1231                 :     322795 : }
    1232                 :            : 
    1233                 :            : 
    1234                 :          0 : static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
    1235                 :            : {
    1236                 :            :         int order = 1;
    1237                 :            :         void *bb;
    1238                 :            : 
    1239         [ -  + ]:    1477157 :         BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
    1240         [ +  - ]:    1477157 :         BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
    1241                 :            : 
    1242                 :            :         bb = e4b->bd_buddy;
    1243         [ +  + ]:   10675290 :         while (order <= e4b->bd_blkbits + 1) {
    1244                 :   10174968 :                 block = block >> 1;
    1245         [ +  + ]:   10174968 :                 if (!mb_test_bit(block, bb)) {
    1246                 :            :                         /* this block is part of buddy of order 'order' */
    1247                 :            :                         return order;
    1248                 :            :                 }
    1249                 :    9198133 :                 bb += 1 << (e4b->bd_blkbits - order);
    1250                 :    9198133 :                 order++;
    1251                 :            :         }
    1252                 :            :         return 0;
    1253                 :            : }
    1254                 :            : 
    1255                 :          0 : static void mb_clear_bits(void *bm, int cur, int len)
    1256                 :            : {
    1257                 :            :         __u32 *addr;
    1258                 :            : 
    1259                 :     127698 :         len = cur + len;
    1260         [ +  + ]:     345201 :         while (cur < len) {
    1261 [ +  + ][ +  + ]:     217503 :                 if ((cur & 31) == 0 && (len - cur) >= 32) {
    1262                 :            :                         /* fast path: clear whole word at once */
    1263                 :      16320 :                         addr = bm + (cur >> 3);
    1264                 :      16320 :                         *addr = 0;
    1265                 :      16320 :                         cur += 32;
    1266                 :      16320 :                         continue;
    1267                 :            :                 }
    1268                 :            :                 mb_clear_bit(cur, bm);
    1269                 :     217503 :                 cur++;
    1270                 :            :         }
    1271                 :     127698 : }
    1272                 :            : 
    1273                 :            : /* clear bits in given range
    1274                 :            :  * will return first found zero bit if any, -1 otherwise
    1275                 :            :  */
    1276                 :          0 : static int mb_test_and_clear_bits(void *bm, int cur, int len)
    1277                 :            : {
    1278                 :            :         __u32 *addr;
    1279                 :            :         int zero_bit = -1;
    1280                 :            : 
    1281                 :     153551 :         len = cur + len;
    1282         [ +  + ]:     917624 :         while (cur < len) {
    1283 [ +  + ][ +  + ]:     764121 :                 if ((cur & 31) == 0 && (len - cur) >= 32) {
    1284                 :            :                         /* fast path: clear whole word at once */
    1285                 :      34169 :                         addr = bm + (cur >> 3);
    1286    [ - ][ #  # ]:      34169 :                         if (*addr != (__u32)(-1) && zero_bit == -1)
    1287                 :          0 :                                 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
    1288                 :      34121 :                         *addr = 0;
    1289                 :      34121 :                         cur += 32;
    1290                 :      34121 :                         continue;
    1291                 :            :                 }
    1292 [ -  + ][ #  # ]:     729952 :                 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
    1293                 :            :                         zero_bit = cur;
    1294                 :     764073 :                 cur++;
    1295                 :            :         }
    1296                 :            : 
    1297                 :     153503 :         return zero_bit;
    1298                 :            : }
    1299                 :            : 
    1300                 :          0 : void ext4_set_bits(void *bm, int cur, int len)
    1301                 :            : {
    1302                 :            :         __u32 *addr;
    1303                 :            : 
    1304                 :     306301 :         len = cur + len;
    1305         [ +  + ]:    1163456 :         while (cur < len) {
    1306 [ +  + ][ +  + ]:     857155 :                 if ((cur & 31) == 0 && (len - cur) >= 32) {
    1307                 :            :                         /* fast path: set whole word at once */
    1308                 :      61433 :                         addr = bm + (cur >> 3);
    1309                 :      61433 :                         *addr = 0xffffffff;
    1310                 :      61433 :                         cur += 32;
    1311                 :      61433 :                         continue;
    1312                 :            :                 }
    1313                 :            :                 mb_set_bit(cur, bm);
    1314                 :     857155 :                 cur++;
    1315                 :            :         }
    1316                 :     306301 : }
    1317                 :            : 
    1318                 :            : /*
    1319                 :            :  * _________________________________________________________________ */
    1320                 :            : 
    1321                 :            : static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
    1322                 :            : {
    1323 [ +  + ][ +  + ]:     373488 :         if (mb_test_bit(*bit + side, bitmap)) {
    1324                 :            :                 mb_clear_bit(*bit, bitmap);
    1325                 :     200512 :                 (*bit) -= side;
    1326                 :            :                 return 1;
    1327                 :            :         }
    1328                 :            :         else {
    1329                 :            :                 (*bit) += side;
    1330                 :            :                 mb_set_bit(*bit, bitmap);
    1331                 :            :                 return -1;
    1332                 :            :         }
    1333                 :            : }
    1334                 :            : 
    1335                 :          0 : static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
    1336                 :            : {
    1337                 :            :         int max;
    1338                 :            :         int order = 1;
    1339                 :     131209 :         void *buddy = mb_find_buddy(e4b, order, &max);
    1340                 :            : 
    1341         [ +  + ]:     378921 :         while (buddy) {
    1342                 :            :                 void *buddy2;
    1343                 :            : 
    1344                 :            :                 /* Bits in range [first; last] are known to be set since
    1345                 :            :                  * corresponding blocks were allocated. Bits in range
    1346                 :            :                  * (first; last) will stay set because they form buddies on
    1347                 :            :                  * upper layer. We just deal with borders if they don't
    1348                 :            :                  * align with upper layer and then go up.
    1349                 :            :                  * Releasing entire group is all about clearing
    1350                 :            :                  * single bit of highest order buddy.
    1351                 :            :                  */
    1352                 :            : 
    1353                 :            :                 /* Example:
    1354                 :            :                  * ---------------------------------
    1355                 :            :                  * |   1   |   1   |   1   |   1   |
    1356                 :            :                  * ---------------------------------
    1357                 :            :                  * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
    1358                 :            :                  * ---------------------------------
    1359                 :            :                  *   0   1   2   3   4   5   6   7
    1360                 :            :                  *      \_____________________/
    1361                 :            :                  *
    1362                 :            :                  * Neither [1] nor [6] is aligned to above layer.
    1363                 :            :                  * Left neighbour [0] is free, so mark it busy,
    1364                 :            :                  * decrease bb_counters and extend range to
    1365                 :            :                  * [0; 6]
    1366                 :            :                  * Right neighbour [7] is busy. It can't be coaleasced with [6], so
    1367                 :            :                  * mark [6] free, increase bb_counters and shrink range to
    1368                 :            :                  * [0; 5].
    1369                 :            :                  * Then shift range to [0; 2], go up and do the same.
    1370                 :            :                  */
    1371                 :            : 
    1372                 :            : 
    1373         [ +  + ]:     378919 :                 if (first & 1)
    1374                 :     186984 :                         e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
    1375         [ +  + ]:     378919 :                 if (!(last & 1))
    1376                 :     186504 :                         e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
    1377            [ + ]:     378919 :                 if (first > last)
    1378                 :            :                         break;
    1379                 :     247809 :                 order++;
    1380                 :            : 
    1381    [ + ][ +  + ]:     247809 :                 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
    1382                 :         99 :                         mb_clear_bits(buddy, first, last - first + 1);
    1383                 :        102 :                         e4b->bd_info->bb_counters[order - 1] += last - first + 1;
    1384                 :        102 :                         break;
    1385                 :            :                 }
    1386                 :     247713 :                 first >>= 1;
    1387                 :     247713 :                 last >>= 1;
    1388                 :            :                 buddy = buddy2;
    1389                 :            :         }
    1390                 :          5 : }
    1391                 :            : 
    1392                 :          0 : static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
    1393                 :            :                            int first, int count)
    1394                 :            : {
    1395                 :            :         int left_is_free = 0;
    1396                 :            :         int right_is_free = 0;
    1397                 :            :         int block;
    1398                 :     153511 :         int last = first + count - 1;
    1399                 :     307014 :         struct super_block *sb = e4b->bd_sb;
    1400                 :            : 
    1401         [ -  + ]:     153511 :         BUG_ON(last >= (sb->s_blocksize << 3));
    1402         [ -  + ]:     153511 :         assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
    1403                 :            :         /* Don't bother if the block group is corrupt. */
    1404         [ +  + ]:     153511 :         if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
    1405                 :     153505 :                 return;
    1406                 :            : 
    1407                 :            :         mb_check_buddy(e4b);
    1408                 :            :         mb_free_blocks_double(inode, e4b, first, count);
    1409                 :            : 
    1410                 :     153509 :         e4b->bd_info->bb_free += count;
    1411         [ +  + ]:     153509 :         if (first < e4b->bd_info->bb_first_free)
    1412                 :       1079 :                 e4b->bd_info->bb_first_free = first;
    1413                 :            : 
    1414                 :            :         /* access memory sequentially: check left neighbour,
    1415                 :            :          * clear range and then check right neighbour
    1416                 :            :          */
    1417         [ +  + ]:     153509 :         if (first != 0)
    1418                 :     153046 :                 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
    1419                 :     153509 :         block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
    1420         [ +  + ]:     153503 :         if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
    1421                 :     153469 :                 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
    1422                 :            : 
    1423         [ -  + ]:     153503 :         if (unlikely(block != -1)) {
    1424                 :            :                 ext4_fsblk_t blocknr;
    1425                 :            : 
    1426                 :          0 :                 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
    1427                 :          0 :                 blocknr += EXT4_C2B(EXT4_SB(sb), block);
    1428         [ #  # ]:          0 :                 ext4_grp_locked_error(sb, e4b->bd_group,
    1429                 :            :                                       inode ? inode->i_ino : 0,
    1430                 :            :                                       blocknr,
    1431                 :            :                                       "freeing already freed block "
    1432                 :            :                                       "(bit %u); block bitmap corrupt.",
    1433                 :            :                                       block);
    1434                 :            :                 /* Mark the block group as corrupt. */
    1435                 :          0 :                 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
    1436                 :            :                         &e4b->bd_info->bb_state);
    1437                 :          0 :                 mb_regenerate_buddy(e4b);
    1438                 :          0 :                 goto done;
    1439                 :            :         }
    1440                 :            : 
    1441                 :            :         /* let's maintain fragments counter */
    1442         [ +  + ]:     153503 :         if (left_is_free && right_is_free)
    1443                 :      62327 :                 e4b->bd_info->bb_fragments--;
    1444         [ +  + ]:      91176 :         else if (!left_is_free && !right_is_free)
    1445                 :      66038 :                 e4b->bd_info->bb_fragments++;
    1446                 :            : 
    1447                 :            :         /* buddy[0] == bd_bitmap is a special case, so handle
    1448                 :            :          * it right away and let mb_buddy_mark_free stay free of
    1449                 :            :          * zero order checks.
    1450                 :            :          * Check if neighbours are to be coaleasced,
    1451                 :            :          * adjust bitmap bb_counters and borders appropriately.
    1452                 :            :          */
    1453         [ +  + ]:     153503 :         if (first & 1) {
    1454                 :      74900 :                 first += !left_is_free;
    1455         [ +  + ]:      74900 :                 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
    1456                 :            :         }
    1457         [ +  + ]:     153503 :         if (!(last & 1)) {
    1458                 :      75396 :                 last -= !right_is_free;
    1459         [ +  + ]:      75396 :                 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
    1460                 :            :         }
    1461                 :            : 
    1462            [ + ]:     153503 :         if (first <= last)
    1463                 :     131207 :                 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
    1464                 :            : 
    1465                 :            : done:
    1466                 :     153503 :         mb_set_largest_free_order(sb, e4b->bd_info);
    1467                 :            :         mb_check_buddy(e4b);
    1468                 :            : }
    1469                 :            : 
    1470                 :          0 : static int mb_find_extent(struct ext4_buddy *e4b, int block,
    1471                 :            :                                 int needed, struct ext4_free_extent *ex)
    1472                 :            : {
    1473                 :            :         int next = block;
    1474                 :            :         int max, order;
    1475                 :            :         void *buddy;
    1476                 :            : 
    1477         [ -  + ]:     767537 :         assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
    1478         [ -  + ]:     767537 :         BUG_ON(ex == NULL);
    1479                 :            : 
    1480                 :     767537 :         buddy = mb_find_buddy(e4b, 0, &max);
    1481         [ -  + ]:    1535072 :         BUG_ON(buddy == NULL);
    1482         [ -  + ]:     767535 :         BUG_ON(block >= max);
    1483         [ +  + ]:     767535 :         if (mb_test_bit(block, buddy)) {
    1484                 :       1561 :                 ex->fe_len = 0;
    1485                 :       1561 :                 ex->fe_start = 0;
    1486                 :       1561 :                 ex->fe_group = 0;
    1487                 :       1561 :                 return 0;
    1488                 :            :         }
    1489                 :            : 
    1490                 :            :         /* find actual order */
    1491                 :     765974 :         order = mb_find_order_for_block(e4b, block);
    1492                 :     765974 :         block = block >> order;
    1493                 :            : 
    1494                 :     765974 :         ex->fe_len = 1 << order;
    1495                 :     765974 :         ex->fe_start = block << order;
    1496                 :     765974 :         ex->fe_group = e4b->bd_group;
    1497                 :            : 
    1498                 :            :         /* calc difference from given start */
    1499                 :     765974 :         next = next - ex->fe_start;
    1500                 :     765974 :         ex->fe_len -= next;
    1501                 :     765974 :         ex->fe_start += next;
    1502                 :            : 
    1503   [ +  +  +  + ]:    2043889 :         while (needed > ex->fe_len &&
    1504                 :     831158 :                mb_find_buddy(e4b, order, &max)) {
    1505                 :            : 
    1506         [ +  + ]:     831156 :                 if (block + 1 >= max)
    1507                 :            :                         break;
    1508                 :            : 
    1509                 :     831151 :                 next = (block + 1) * (1 << order);
    1510         [ +  + ]:     831151 :                 if (mb_test_bit(next, e4b->bd_bitmap))
    1511                 :            :                         break;
    1512                 :            : 
    1513                 :     446757 :                 order = mb_find_order_for_block(e4b, next);
    1514                 :            : 
    1515                 :     446753 :                 block = next >> order;
    1516                 :     446753 :                 ex->fe_len += 1 << order;
    1517                 :            :         }
    1518                 :            : 
    1519         [ -  + ]:     765974 :         BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
    1520                 :            :         return ex->fe_len;
    1521                 :            : }
    1522                 :            : 
    1523                 :          0 : static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
    1524                 :            : {
    1525                 :            :         int ord;
    1526                 :            :         int mlen = 0;
    1527                 :      88756 :         int max = 0;
    1528                 :            :         int cur;
    1529                 :      88756 :         int start = ex->fe_start;
    1530                 :      88756 :         int len = ex->fe_len;
    1531                 :            :         unsigned ret = 0;
    1532                 :            :         int len0 = len;
    1533                 :            :         void *buddy;
    1534                 :            : 
    1535         [ -  + ]:      88756 :         BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
    1536         [ -  + ]:      88756 :         BUG_ON(e4b->bd_group != ex->fe_group);
    1537         [ -  + ]:      88756 :         assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
    1538                 :            :         mb_check_buddy(e4b);
    1539                 :            :         mb_mark_used_double(e4b, start, len);
    1540                 :            : 
    1541                 :      88756 :         e4b->bd_info->bb_free -= len;
    1542         [ +  + ]:      88756 :         if (e4b->bd_info->bb_first_free == start)
    1543                 :       4988 :                 e4b->bd_info->bb_first_free += len;
    1544                 :            : 
    1545                 :            :         /* let's maintain fragments counter */
    1546         [ +  + ]:      88756 :         if (start != 0)
    1547                 :      88295 :                 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
    1548         [ +  + ]:      88756 :         if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
    1549                 :      88720 :                 max = !mb_test_bit(start + len, e4b->bd_bitmap);
    1550         [ +  + ]:      88756 :         if (mlen && max)
    1551                 :      88756 :                 e4b->bd_info->bb_fragments++;
    1552         [ +  + ]:      83228 :         else if (!mlen && !max)
    1553                 :       9130 :                 e4b->bd_info->bb_fragments--;
    1554                 :            : 
    1555                 :            :         /* let's maintain buddy itself */
    1556         [ +  + ]:     353206 :         while (len) {
    1557                 :     264453 :                 ord = mb_find_order_for_block(e4b, start);
    1558                 :            : 
    1559 [ +  + ][ +  + ]:     264456 :                 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
    1560                 :            :                         /* the whole chunk may be allocated at once! */
    1561                 :            :                         mlen = 1 << ord;
    1562                 :     148071 :                         buddy = mb_find_buddy(e4b, ord, &max);
    1563         [ -  + ]:     236822 :                         BUG_ON((start >> ord) >= max);
    1564                 :            :                         mb_set_bit(start >> ord, buddy);
    1565                 :     148066 :                         e4b->bd_info->bb_counters[ord]--;
    1566                 :     148066 :                         start += mlen;
    1567                 :     148066 :                         len -= mlen;
    1568         [ -  + ]:     148066 :                         BUG_ON(len < 0);
    1569                 :     148066 :                         continue;
    1570                 :            :                 }
    1571                 :            : 
    1572                 :            :                 /* store for history */
    1573         [ +  + ]:     116385 :                 if (ret == 0)
    1574                 :      45213 :                         ret = len | (ord << 16);
    1575                 :            : 
    1576                 :            :                 /* we have to split large buddy */
    1577         [ -  + ]:     116385 :                 BUG_ON(ord <= 0);
    1578                 :     116385 :                 buddy = mb_find_buddy(e4b, ord, &max);
    1579                 :            :                 mb_set_bit(start >> ord, buddy);
    1580                 :     116385 :                 e4b->bd_info->bb_counters[ord]--;
    1581                 :            : 
    1582                 :     116385 :                 ord--;
    1583                 :     116385 :                 cur = (start >> ord) & ~1U;
    1584                 :     116385 :                 buddy = mb_find_buddy(e4b, ord, &max);
    1585                 :            :                 mb_clear_bit(cur, buddy);
    1586                 :     116384 :                 mb_clear_bit(cur + 1, buddy);
    1587                 :     116384 :                 e4b->bd_info->bb_counters[ord]++;
    1588                 :     264450 :                 e4b->bd_info->bb_counters[ord]++;
    1589                 :            :         }
    1590                 :      88753 :         mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
    1591                 :            : 
    1592                 :      88753 :         ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
    1593                 :            :         mb_check_buddy(e4b);
    1594                 :            : 
    1595                 :      88754 :         return ret;
    1596                 :            : }
    1597                 :            : 
    1598                 :            : /*
    1599                 :            :  * Must be called under group lock!
    1600                 :            :  */
    1601                 :          0 : static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
    1602                 :            :                                         struct ext4_buddy *e4b)
    1603                 :            : {
    1604                 :      88754 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    1605                 :            :         int ret;
    1606                 :            : 
    1607         [ -  + ]:      88754 :         BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
    1608         [ -  + ]:      88754 :         BUG_ON(ac->ac_status == AC_STATUS_FOUND);
    1609                 :            : 
    1610                 :      88754 :         ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
    1611                 :      88754 :         ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
    1612                 :      88754 :         ret = mb_mark_used(e4b, &ac->ac_b_ex);
    1613                 :            : 
    1614                 :            :         /* preallocation can change ac_b_ex, thus we store actually
    1615                 :            :          * allocated blocks for history */
    1616                 :      88754 :         ac->ac_f_ex = ac->ac_b_ex;
    1617                 :            : 
    1618                 :      88754 :         ac->ac_status = AC_STATUS_FOUND;
    1619                 :      88754 :         ac->ac_tail = ret & 0xffff;
    1620                 :      88754 :         ac->ac_buddy = ret >> 16;
    1621                 :            : 
    1622                 :            :         /*
    1623                 :            :          * take the page reference. We want the page to be pinned
    1624                 :            :          * so that we don't get a ext4_mb_init_cache_call for this
    1625                 :            :          * group until we update the bitmap. That would mean we
    1626                 :            :          * double allocate blocks. The reference is dropped
    1627                 :            :          * in ext4_mb_release_context
    1628                 :            :          */
    1629                 :      88754 :         ac->ac_bitmap_page = e4b->bd_bitmap_page;
    1630                 :            :         get_page(ac->ac_bitmap_page);
    1631                 :      88753 :         ac->ac_buddy_page = e4b->bd_buddy_page;
    1632                 :            :         get_page(ac->ac_buddy_page);
    1633                 :            :         /* store last allocated for subsequent stream allocation */
    1634         [ +  + ]:      88754 :         if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
    1635                 :            :                 spin_lock(&sbi->s_md_lock);
    1636                 :      25247 :                 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
    1637                 :      25247 :                 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
    1638                 :            :                 spin_unlock(&sbi->s_md_lock);
    1639                 :            :         }
    1640                 :      88754 : }
    1641                 :            : 
    1642                 :            : /*
    1643                 :            :  * regular allocator, for general purposes allocation
    1644                 :            :  */
    1645                 :            : 
    1646                 :          0 : static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
    1647                 :            :                                         struct ext4_buddy *e4b,
    1648                 :            :                                         int finish_group)
    1649                 :            : {
    1650                 :     692697 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    1651                 :            :         struct ext4_free_extent *bex = &ac->ac_b_ex;
    1652                 :            :         struct ext4_free_extent *gex = &ac->ac_g_ex;
    1653                 :            :         struct ext4_free_extent ex;
    1654                 :            :         int max;
    1655                 :            : 
    1656         [ +  + ]:     692697 :         if (ac->ac_status == AC_STATUS_FOUND)
    1657                 :     437174 :                 return;
    1658                 :            :         /*
    1659                 :            :          * We don't want to scan for a whole year
    1660                 :            :          */
    1661 [ +  + ][ +  - ]:     643577 :         if (ac->ac_found > sbi->s_mb_max_to_scan &&
    1662                 :         20 :                         !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
    1663                 :         20 :                 ac->ac_status = AC_STATUS_BREAK;
    1664                 :         20 :                 return;
    1665                 :            :         }
    1666                 :            : 
    1667                 :            :         /*
    1668                 :            :          * Haven't found good chunk so far, let's continue
    1669                 :            :          */
    1670         [ +  + ]:     643557 :         if (bex->fe_len < gex->fe_len)
    1671                 :            :                 return;
    1672                 :            : 
    1673 [ +  + ][ +  + ]:     294548 :         if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
    1674         [ +  - ]:      39025 :                         && bex->fe_group == e4b->bd_group) {
    1675                 :            :                 /* recheck chunk's availability - we don't know
    1676                 :            :                  * when it was found (within this lock-unlock
    1677                 :            :                  * period or not) */
    1678                 :      39025 :                 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
    1679         [ +  - ]:      39025 :                 if (max >= gex->fe_len) {
    1680                 :     294548 :                         ext4_mb_use_best_found(ac, e4b);
    1681                 :      39025 :                         return;
    1682                 :            :                 }
    1683                 :            :         }
    1684                 :            : }
    1685                 :            : 
    1686                 :            : /*
    1687                 :            :  * The routine checks whether found extent is good enough. If it is,
    1688                 :            :  * then the extent gets marked used and flag is set to the context
    1689                 :            :  * to stop scanning. Otherwise, the extent is compared with the
    1690                 :            :  * previous found extent and if new one is better, then it's stored
    1691                 :            :  * in the context. Later, the best found extent will be used, if
    1692                 :            :  * mballoc can't find good enough extent.
    1693                 :            :  *
    1694                 :            :  * FIXME: real allocation policy is to be designed yet!
    1695                 :            :  */
    1696                 :          0 : static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
    1697                 :            :                                         struct ext4_free_extent *ex,
    1698                 :            :                                         struct ext4_buddy *e4b)
    1699                 :            : {
    1700                 :            :         struct ext4_free_extent *bex = &ac->ac_b_ex;
    1701                 :            :         struct ext4_free_extent *gex = &ac->ac_g_ex;
    1702                 :            : 
    1703         [ -  + ]:     704011 :         BUG_ON(ex->fe_len <= 0);
    1704         [ -  + ]:     704011 :         BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
    1705         [ -  + ]:     704011 :         BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
    1706         [ -  + ]:     704011 :         BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
    1707                 :            : 
    1708                 :     704011 :         ac->ac_found++;
    1709                 :            : 
    1710                 :            :         /*
    1711                 :            :          * The special case - take what you catch first
    1712                 :            :          */
    1713         [ -  + ]:     704011 :         if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
    1714                 :          0 :                 *bex = *ex;
    1715                 :          0 :                 ext4_mb_use_best_found(ac, e4b);
    1716                 :          0 :                 return;
    1717                 :            :         }
    1718                 :            : 
    1719                 :            :         /*
    1720                 :            :          * Let's check whether the chuck is good enough
    1721                 :            :          */
    1722         [ +  + ]:     704011 :         if (ex->fe_len == gex->fe_len) {
    1723                 :      33726 :                 *bex = *ex;
    1724                 :      33726 :                 ext4_mb_use_best_found(ac, e4b);
    1725                 :      33726 :                 return;
    1726                 :            :         }
    1727                 :            : 
    1728                 :            :         /*
    1729                 :            :          * If this is first found extent, just store it in the context
    1730                 :            :          */
    1731         [ +  + ]:     670285 :         if (bex->fe_len == 0) {
    1732                 :      50350 :                 *bex = *ex;
    1733                 :      50350 :                 return;
    1734                 :            :         }
    1735                 :            : 
    1736                 :            :         /*
    1737                 :            :          * If new found extent is better, store it in the context
    1738                 :            :          */
    1739         [ +  + ]:     619935 :         if (bex->fe_len < gex->fe_len) {
    1740                 :            :                 /* if the request isn't satisfied, any found extent
    1741                 :            :                  * larger than previous best one is better */
    1742         [ +  + ]:     359421 :                 if (ex->fe_len > bex->fe_len)
    1743                 :      48200 :                         *bex = *ex;
    1744         [ +  + ]:     260514 :         } else if (ex->fe_len > gex->fe_len) {
    1745                 :            :                 /* if the request is satisfied, then we try to find
    1746                 :            :                  * an extent that still satisfy the request, but is
    1747                 :            :                  * smaller than previous one */
    1748         [ +  + ]:     250770 :                 if (ex->fe_len < bex->fe_len)
    1749                 :      13160 :                         *bex = *ex;
    1750                 :            :         }
    1751                 :            : 
    1752                 :     619935 :         ext4_mb_check_limits(ac, e4b, 0);
    1753                 :            : }
    1754                 :            : 
    1755                 :            : static noinline_for_stack
    1756                 :          0 : int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
    1757                 :         10 :                                         struct ext4_buddy *e4b)
    1758                 :            : {
    1759                 :         10 :         struct ext4_free_extent ex = ac->ac_b_ex;
    1760                 :         10 :         ext4_group_t group = ex.fe_group;
    1761                 :            :         int max;
    1762                 :            :         int err;
    1763                 :            : 
    1764         [ -  + ]:         10 :         BUG_ON(ex.fe_len <= 0);
    1765                 :         10 :         err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
    1766         [ +  - ]:         10 :         if (err)
    1767                 :            :                 return err;
    1768                 :            : 
    1769                 :         10 :         ext4_lock_group(ac->ac_sb, group);
    1770                 :         10 :         max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
    1771                 :            : 
    1772         [ +  - ]:         10 :         if (max > 0) {
    1773                 :         10 :                 ac->ac_b_ex = ex;
    1774                 :         10 :                 ext4_mb_use_best_found(ac, e4b);
    1775                 :            :         }
    1776                 :            : 
    1777                 :         10 :         ext4_unlock_group(ac->ac_sb, group);
    1778                 :         10 :         ext4_mb_unload_buddy(e4b);
    1779                 :            : 
    1780                 :         10 :         return 0;
    1781                 :            : }
    1782                 :            : 
    1783                 :            : static noinline_for_stack
    1784                 :          0 : int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
    1785                 :      24493 :                                 struct ext4_buddy *e4b)
    1786                 :            : {
    1787                 :      88754 :         ext4_group_t group = ac->ac_g_ex.fe_group;
    1788                 :            :         int max;
    1789                 :            :         int err;
    1790                 :      88754 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    1791                 :            :         struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
    1792                 :            :         struct ext4_free_extent ex;
    1793                 :            : 
    1794         [ +  + ]:      88754 :         if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
    1795                 :            :                 return 0;
    1796         [ +  + ]:      24493 :         if (grp->bb_free == 0)
    1797                 :            :                 return 0;
    1798                 :            : 
    1799                 :      24490 :         err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
    1800            [ + ]:      24489 :         if (err)
    1801                 :            :                 return err;
    1802                 :            : 
    1803         [ -  + ]:      24492 :         if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
    1804                 :          0 :                 ext4_mb_unload_buddy(e4b);
    1805                 :          0 :                 return 0;
    1806                 :            :         }
    1807                 :            : 
    1808                 :      24492 :         ext4_lock_group(ac->ac_sb, group);
    1809                 :      24489 :         max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
    1810                 :            :                              ac->ac_g_ex.fe_len, &ex);
    1811                 :            : 
    1812    [ +  + ][ + ]:     113247 :         if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
    1813                 :            :                 ext4_fsblk_t start;
    1814                 :            : 
    1815                 :     177508 :                 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
    1816                 :      88754 :                         ex.fe_start;
    1817                 :            :                 /* use do_div to get remainder (would be 64-bit modulo) */
    1818 [ -  + ][ #  # ]:      88754 :                 if (do_div(start, sbi->s_stripe) == 0) {
         [ -  + ][ -  + ]
         [ -  + ][ -  + ]
         [ -  + ][ -  + ]
         [ -  + ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
    1819                 :          0 :                         ac->ac_found++;
    1820                 :          0 :                         ac->ac_b_ex = ex;
    1821                 :          0 :                         ext4_mb_use_best_found(ac, e4b);
    1822                 :            :                 }
    1823         [ +  + ]:      24493 :         } else if (max >= ac->ac_g_ex.fe_len) {
    1824         [ -  + ]:      10604 :                 BUG_ON(ex.fe_len <= 0);
    1825         [ -  + ]:      10604 :                 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
    1826         [ -  + ]:      10604 :                 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
    1827                 :      10604 :                 ac->ac_found++;
    1828                 :      10604 :                 ac->ac_b_ex = ex;
    1829                 :      10604 :                 ext4_mb_use_best_found(ac, e4b);
    1830 [ +  + ][ -  + ]:      13889 :         } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
    1831                 :            :                 /* Sometimes, caller may want to merge even small
    1832                 :            :                  * number of blocks to an existing extent */
    1833         [ #  # ]:          0 :                 BUG_ON(ex.fe_len <= 0);
    1834         [ #  # ]:          0 :                 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
    1835         [ #  # ]:          0 :                 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
    1836                 :          0 :                 ac->ac_found++;
    1837                 :          0 :                 ac->ac_b_ex = ex;
    1838                 :          0 :                 ext4_mb_use_best_found(ac, e4b);
    1839                 :            :         }
    1840                 :      24493 :         ext4_unlock_group(ac->ac_sb, group);
    1841                 :      24493 :         ext4_mb_unload_buddy(e4b);
    1842                 :            : 
    1843                 :      24493 :         return 0;
    1844                 :            : }
    1845                 :            : 
    1846                 :            : /*
    1847                 :            :  * The routine scans buddy structures (not bitmap!) from given order
    1848                 :            :  * to max order and tries to find big enough chunk to satisfy the req
    1849                 :            :  */
    1850                 :            : static noinline_for_stack
    1851                 :          0 : void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
    1852                 :            :                                         struct ext4_buddy *e4b)
    1853                 :            : {
    1854                 :      10778 :         struct super_block *sb = ac->ac_sb;
    1855                 :       5389 :         struct ext4_group_info *grp = e4b->bd_info;
    1856                 :            :         void *buddy;
    1857                 :            :         int i;
    1858                 :            :         int k;
    1859                 :            :         int max;
    1860                 :            : 
    1861         [ -  + ]:       5389 :         BUG_ON(ac->ac_2order <= 0);
    1862         [ +  - ]:       7185 :         for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
    1863         [ +  + ]:       7185 :                 if (grp->bb_counters[i] == 0)
    1864                 :       1796 :                         continue;
    1865                 :            : 
    1866                 :       5389 :                 buddy = mb_find_buddy(e4b, i, &max);
    1867         [ -  + ]:       5389 :                 BUG_ON(buddy == NULL);
    1868                 :            : 
    1869                 :       5389 :                 k = mb_find_next_zero_bit(buddy, max, 0);
    1870         [ -  + ]:       5389 :                 BUG_ON(k >= max);
    1871                 :            : 
    1872                 :       5389 :                 ac->ac_found++;
    1873                 :            : 
    1874                 :       5389 :                 ac->ac_b_ex.fe_len = 1 << i;
    1875                 :       5389 :                 ac->ac_b_ex.fe_start = k << i;
    1876                 :       5389 :                 ac->ac_b_ex.fe_group = e4b->bd_group;
    1877                 :            : 
    1878                 :       5389 :                 ext4_mb_use_best_found(ac, e4b);
    1879                 :            : 
    1880         [ -  + ]:       5389 :                 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
    1881                 :            : 
    1882         [ -  + ]:       5389 :                 if (EXT4_SB(sb)->s_mb_stats)
    1883                 :          0 :                         atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
    1884                 :            : 
    1885                 :            :                 break;
    1886                 :            :         }
    1887                 :       5389 : }
    1888                 :            : 
    1889                 :            : /*
    1890                 :            :  * The routine scans the group and measures all found extents.
    1891                 :            :  * In order to optimize scanning, caller must pass number of
    1892                 :            :  * free blocks in the group, so the routine can know upper limit.
    1893                 :            :  */
    1894                 :            : static noinline_for_stack
    1895                 :          0 : void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
    1896                 :            :                                         struct ext4_buddy *e4b)
    1897                 :            : {
    1898                 :    1480780 :         struct super_block *sb = ac->ac_sb;
    1899                 :      72761 :         void *bitmap = e4b->bd_bitmap;
    1900                 :            :         struct ext4_free_extent ex;
    1901                 :            :         int i;
    1902                 :            :         int free;
    1903                 :            : 
    1904                 :      72761 :         free = e4b->bd_info->bb_free;
    1905         [ -  + ]:      72761 :         BUG_ON(free <= 0);
    1906                 :            : 
    1907                 :      72761 :         i = e4b->bd_info->bb_first_free;
    1908                 :            : 
    1909 [ +  + ][ +  + ]:     776772 :         while (free && ac->ac_status == AC_STATUS_CONTINUE) {
    1910                 :     704010 :                 i = mb_find_next_zero_bit(bitmap,
    1911                 :     704010 :                                                 EXT4_CLUSTERS_PER_GROUP(sb), i);
    1912         [ -  + ]:     704009 :                 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
    1913                 :            :                         /*
    1914                 :            :                          * IF we have corrupt bitmap, we won't find any
    1915                 :            :                          * free blocks even though group info says we
    1916                 :            :                          * we have free blocks
    1917                 :            :                          */
    1918                 :          0 :                         ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
    1919                 :            :                                         "%d free clusters as per "
    1920                 :            :                                         "group info. But bitmap says 0",
    1921                 :            :                                         free);
    1922                 :          0 :                         break;
    1923                 :            :                 }
    1924                 :            : 
    1925                 :     704009 :                 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
    1926         [ -  + ]:     704010 :                 BUG_ON(ex.fe_len <= 0);
    1927         [ -  + ]:     704010 :                 if (free < ex.fe_len) {
    1928                 :          0 :                         ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
    1929                 :            :                                         "%d free clusters as per "
    1930                 :            :                                         "group info. But got %d blocks",
    1931                 :            :                                         free, ex.fe_len);
    1932                 :            :                         /*
    1933                 :            :                          * The number of free blocks differs. This mostly
    1934                 :            :                          * indicate that the bitmap is corrupt. So exit
    1935                 :            :                          * without claiming the space.
    1936                 :            :                          */
    1937                 :          0 :                         break;
    1938                 :            :                 }
    1939                 :            : 
    1940                 :     704010 :                 ext4_mb_measure_extent(ac, &ex, e4b);
    1941                 :            : 
    1942                 :     704011 :                 i += ex.fe_len;
    1943                 :     704011 :                 free -= ex.fe_len;
    1944                 :            :         }
    1945                 :            : 
    1946                 :      72762 :         ext4_mb_check_limits(ac, e4b, 1);
    1947                 :      72761 : }
    1948                 :            : 
    1949                 :            : /*
    1950                 :            :  * This is a special case for storages like raid5
    1951                 :            :  * we try to find stripe-aligned chunks for stripe-size-multiple requests
    1952                 :            :  */
    1953                 :            : static noinline_for_stack
    1954                 :          0 : void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
    1955                 :            :                                  struct ext4_buddy *e4b)
    1956                 :            : {
    1957                 :          0 :         struct super_block *sb = ac->ac_sb;
    1958                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    1959                 :          0 :         void *bitmap = e4b->bd_bitmap;
    1960                 :            :         struct ext4_free_extent ex;
    1961                 :            :         ext4_fsblk_t first_group_block;
    1962                 :            :         ext4_fsblk_t a;
    1963                 :            :         ext4_grpblk_t i;
    1964                 :            :         int max;
    1965                 :            : 
    1966         [ #  # ]:          0 :         BUG_ON(sbi->s_stripe == 0);
    1967                 :            : 
    1968                 :            :         /* find first stripe-aligned block in group */
    1969                 :          0 :         first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
    1970                 :            : 
    1971                 :          0 :         a = first_group_block + sbi->s_stripe - 1;
    1972 [ #  # ][ #  # ]:          0 :         do_div(a, sbi->s_stripe);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
                 [ #  # ]
    1973                 :          0 :         i = (a * sbi->s_stripe) - first_group_block;
    1974                 :            : 
    1975         [ #  # ]:          0 :         while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
    1976         [ #  # ]:          0 :                 if (!mb_test_bit(i, bitmap)) {
    1977                 :          0 :                         max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
    1978         [ #  # ]:          0 :                         if (max >= sbi->s_stripe) {
    1979                 :          0 :                                 ac->ac_found++;
    1980                 :          0 :                                 ac->ac_b_ex = ex;
    1981                 :          0 :                                 ext4_mb_use_best_found(ac, e4b);
    1982                 :          0 :                                 break;
    1983                 :            :                         }
    1984                 :            :                 }
    1985                 :          0 :                 i += sbi->s_stripe;
    1986                 :            :         }
    1987                 :          0 : }
    1988                 :            : 
    1989                 :            : /* This is now called BEFORE we load the buddy bitmap. */
    1990                 :          0 : static int ext4_mb_good_group(struct ext4_allocation_context *ac,
    1991                 :            :                                 ext4_group_t group, int cr)
    1992                 :            : {
    1993                 :            :         unsigned free, fragments;
    1994                 :     181194 :         int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
    1995                 :            :         struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
    1996                 :            : 
    1997         [ -  + ]:     181194 :         BUG_ON(cr < 0 || cr >= 4);
    1998                 :            : 
    1999                 :     181194 :         free = grp->bb_free;
    2000         [ +  + ]:     181194 :         if (free == 0)
    2001                 :            :                 return 0;
    2002 [ +  + ][ +  + ]:     157788 :         if (cr <= 2 && free < ac->ac_g_ex.fe_len)
    2003                 :            :                 return 0;
    2004                 :            : 
    2005         [ +  - ]:     157189 :         if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
    2006                 :            :                 return 0;
    2007                 :            : 
    2008                 :            :         /* We only do this if the grp has never been initialized */
    2009         [ +  + ]:     157189 :         if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
    2010                 :         39 :                 int ret = ext4_mb_init_group(ac->ac_sb, group);
    2011            [ + ]:         39 :                 if (ret)
    2012                 :            :                         return 0;
    2013                 :            :         }
    2014                 :            : 
    2015                 :     157190 :         fragments = grp->bb_fragments;
    2016         [ +  - ]:     157190 :         if (fragments == 0)
    2017                 :            :                 return 0;
    2018                 :            : 
    2019   [ +  +  -  -  :     157190 :         switch (cr) {
                      + ]
    2020                 :            :         case 0:
    2021         [ -  + ]:      10918 :                 BUG_ON(ac->ac_2order == 0);
    2022                 :            : 
    2023                 :            :                 /* Avoid using the first bg of a flexgroup for data files */
    2024 [ +  - ][ +  - ]:      10918 :                 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
    2025         [ +  + ]:      10918 :                     (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
    2026                 :      10918 :                     ((group % flex_size) == 0))
    2027                 :            :                         return 0;
    2028                 :            : 
    2029 [ +  - ][ +  + ]:      10900 :                 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
    2030                 :      10900 :                     (free / fragments) >= ac->ac_g_ex.fe_len)
    2031                 :            :                         return 1;
    2032                 :            : 
    2033         [ +  + ]:        266 :                 if (grp->bb_largest_free_order < ac->ac_2order)
    2034                 :            :                         return 0;
    2035                 :            : 
    2036                 :        144 :                 return 1;
    2037                 :            :         case 1:
    2038         [ +  + ]:     146271 :                 if ((free / fragments) >= ac->ac_g_ex.fe_len)
    2039                 :            :                         return 1;
    2040                 :            :                 break;
    2041                 :            :         case 2:
    2042         [ #  # ]:          0 :                 if (free >= ac->ac_g_ex.fe_len)
    2043                 :            :                         return 1;
    2044                 :            :                 break;
    2045                 :            :         case 3:
    2046                 :            :                 return 1;
    2047                 :            :         default:
    2048                 :          0 :                 BUG();
    2049                 :            :         }
    2050                 :            : 
    2051                 :        751 :         return 0;
    2052                 :            : }
    2053                 :            : 
    2054                 :            : static noinline_for_stack int
    2055                 :          0 : ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
    2056                 :            : {
    2057                 :            :         ext4_group_t ngroups, group, i;
    2058                 :            :         int cr;
    2059                 :            :         int err = 0;
    2060                 :            :         struct ext4_sb_info *sbi;
    2061                 :      88753 :         struct super_block *sb;
    2062                 :            :         struct ext4_buddy e4b;
    2063                 :            : 
    2064                 :      88753 :         sb = ac->ac_sb;
    2065                 :            :         sbi = EXT4_SB(sb);
    2066                 :            :         ngroups = ext4_get_groups_count(sb);
    2067                 :            :         /* non-extent files are limited to low blocks/groups */
    2068         [ -  + ]:      88754 :         if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
    2069                 :          0 :                 ngroups = sbi->s_blockfile_groups;
    2070                 :            : 
    2071         [ -  + ]:      88754 :         BUG_ON(ac->ac_status == AC_STATUS_FOUND);
    2072                 :            : 
    2073                 :            :         /* first, try the goal */
    2074                 :      88754 :         err = ext4_mb_find_by_goal(ac, &e4b);
    2075 [ +  - ][ +  + ]:      88754 :         if (err || ac->ac_status == AC_STATUS_FOUND)
    2076                 :            :                 goto out;
    2077                 :            : 
    2078         [ +  - ]:      78150 :         if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
    2079                 :            :                 goto out;
    2080                 :            : 
    2081                 :            :         /*
    2082                 :            :          * ac->ac2_order is set only if the fe_len is a power of 2
    2083                 :            :          * if ac2_order is set we also set criteria to 0 so that we
    2084                 :            :          * try exact allocation using buddy.
    2085                 :            :          */
    2086                 :      67547 :         i = fls(ac->ac_g_ex.fe_len);
    2087                 :          0 :         ac->ac_2order = 0;
    2088                 :            :         /*
    2089                 :            :          * We search using buddy data only if the order of the request
    2090                 :            :          * is greater than equal to the sbi_s_mb_order2_reqs
    2091                 :            :          * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
    2092                 :            :          */
    2093            [ + ]:      78150 :         if (i >= sbi->s_mb_order2_reqs) {
    2094                 :            :                 /*
    2095                 :            :                  * This should tell if fe_len is exactly power of 2
    2096                 :            :                  */
    2097         [ +  + ]:      20383 :                 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
    2098                 :       5389 :                         ac->ac_2order = i - 1;
    2099                 :            :         }
    2100                 :            : 
    2101                 :            :         /* if stream allocation is enabled, use global goal */
    2102            [ + ]:          0 :         if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
    2103                 :            :                 /* TBD: may be hot point */
    2104                 :            :                 spin_lock(&sbi->s_md_lock);
    2105                 :      14643 :                 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
    2106                 :      14643 :                 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
    2107                 :            :                 spin_unlock(&sbi->s_md_lock);
    2108                 :            :         }
    2109                 :            : 
    2110                 :            :         /* Let's just scan groups to find more-less suitable blocks */
    2111                 :      78150 :         cr = ac->ac_2order ? 0 : 1;
    2112                 :            :         /*
    2113                 :            :          * cr == 0 try to get exact allocation,
    2114                 :            :          * cr == 3  try to get anything
    2115                 :            :          */
    2116                 :            : repeat:
    2117 [ +  - ][ +  + ]:     156300 :         for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
    2118                 :      78150 :                 ac->ac_criteria = cr;
    2119                 :            :                 /*
    2120                 :            :                  * searching for the right group start
    2121                 :            :                  * from the goal value specified
    2122                 :            :                  */
    2123                 :      78150 :                 group = ac->ac_g_ex.fe_group;
    2124                 :            : 
    2125         [ +  - ]:     103045 :                 for (i = 0; i < ngroups; group++, i++) {
    2126                 :     103045 :                         cond_resched();
    2127                 :            :                         /*
    2128                 :            :                          * Artificially restricted ngroups for non-extent
    2129                 :            :                          * files makes group > ngroups possible on first loop.
    2130                 :            :                          */
    2131         [ -  + ]:     103044 :                         if (group >= ngroups)
    2132                 :            :                                 group = 0;
    2133                 :            : 
    2134                 :            :                         /* This now checks without needing the buddy page */
    2135         [ +  + ]:     103044 :                         if (!ext4_mb_good_group(ac, group, cr))
    2136                 :      24895 :                                 continue;
    2137                 :            : 
    2138                 :      78149 :                         err = ext4_mb_load_buddy(sb, group, &e4b);
    2139         [ +  + ]:      78150 :                         if (err)
    2140                 :            :                                 goto out;
    2141                 :            : 
    2142                 :            :                         ext4_lock_group(sb, group);
    2143                 :            : 
    2144                 :            :                         /*
    2145                 :            :                          * We need to check again after locking the
    2146                 :            :                          * block group
    2147                 :            :                          */
    2148         [ -  + ]:      78150 :                         if (!ext4_mb_good_group(ac, group, cr)) {
    2149                 :            :                                 ext4_unlock_group(sb, group);
    2150                 :          0 :                                 ext4_mb_unload_buddy(&e4b);
    2151                 :          0 :                                 continue;
    2152                 :            :                         }
    2153                 :            : 
    2154                 :      78150 :                         ac->ac_groups_scanned++;
    2155 [ +  + ][ +  - ]:      78150 :                         if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
    2156                 :       5389 :                                 ext4_mb_simple_scan_group(ac, &e4b);
    2157 [ +  - ][ -  + ]:      72761 :                         else if (cr == 1 && sbi->s_stripe &&
                 [ #  # ]
    2158                 :          0 :                                         !(ac->ac_g_ex.fe_len % sbi->s_stripe))
    2159                 :          0 :                                 ext4_mb_scan_aligned(ac, &e4b);
    2160                 :            :                         else
    2161                 :      72761 :                                 ext4_mb_complex_scan_group(ac, &e4b);
    2162                 :            : 
    2163                 :            :                         ext4_unlock_group(sb, group);
    2164                 :      78150 :                         ext4_mb_unload_buddy(&e4b);
    2165                 :            : 
    2166         [ -  + ]:      78150 :                         if (ac->ac_status != AC_STATUS_CONTINUE)
    2167                 :            :                                 break;
    2168                 :            :                 }
    2169                 :            :         }
    2170                 :            : 
    2171 [ +  - ][ +  + ]:      78150 :         if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
                 [ +  - ]
    2172                 :         10 :             !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
    2173                 :            :                 /*
    2174                 :            :                  * We've been searching too long. Let's try to allocate
    2175                 :            :                  * the best chunk we've found so far
    2176                 :            :                  */
    2177                 :            : 
    2178                 :         10 :                 ext4_mb_try_best_found(ac, &e4b);
    2179         [ -  + ]:         10 :                 if (ac->ac_status != AC_STATUS_FOUND) {
    2180                 :            :                         /*
    2181                 :            :                          * Someone more lucky has already allocated it.
    2182                 :            :                          * The only thing we can do is just take first
    2183                 :            :                          * found block(s)
    2184                 :            :                         printk(KERN_DEBUG "EXT4-fs: someone won our chunk\n");
    2185                 :            :                          */
    2186                 :          0 :                         ac->ac_b_ex.fe_group = 0;
    2187                 :          0 :                         ac->ac_b_ex.fe_start = 0;
    2188                 :          0 :                         ac->ac_b_ex.fe_len = 0;
    2189                 :          0 :                         ac->ac_status = AC_STATUS_CONTINUE;
    2190                 :          0 :                         ac->ac_flags |= EXT4_MB_HINT_FIRST;
    2191                 :            :                         cr = 3;
    2192                 :          0 :                         atomic_inc(&sbi->s_mb_lost_chunks);
    2193                 :            :                         goto repeat;
    2194                 :            :                 }
    2195                 :            :         }
    2196                 :            : out:
    2197                 :      88755 :         return err;
    2198                 :            : }
    2199                 :            : 
    2200                 :          0 : static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
    2201                 :            : {
    2202                 :         15 :         struct super_block *sb = seq->private;
    2203                 :            :         ext4_group_t group;
    2204                 :            : 
    2205   [ +  -  +  + ]:         30 :         if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
    2206                 :            :                 return NULL;
    2207                 :         13 :         group = *pos + 1;
    2208                 :         13 :         return (void *) ((unsigned long) group);
    2209                 :            : }
    2210                 :            : 
    2211                 :          0 : static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
    2212                 :            : {
    2213                 :        103 :         struct super_block *sb = seq->private;
    2214                 :            :         ext4_group_t group;
    2215                 :            : 
    2216                 :        103 :         ++*pos;
    2217   [ +  -  +  - ]:        206 :         if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
    2218                 :            :                 return NULL;
    2219                 :        103 :         group = *pos + 1;
    2220                 :        103 :         return (void *) ((unsigned long) group);
    2221                 :            : }
    2222                 :            : 
    2223                 :          0 : static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
    2224                 :            : {
    2225                 :        116 :         struct super_block *sb = seq->private;
    2226                 :        116 :         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
    2227                 :            :         int i;
    2228                 :            :         int err, buddy_loaded = 0;
    2229                 :            :         struct ext4_buddy e4b;
    2230                 :            :         struct ext4_group_info *grinfo;
    2231                 :            :         struct sg {
    2232                 :            :                 struct ext4_group_info info;
    2233                 :            :                 ext4_grpblk_t counters[16];
    2234                 :            :         } sg;
    2235                 :            : 
    2236                 :        116 :         group--;
    2237         [ +  + ]:        116 :         if (group == 0)
    2238                 :          1 :                 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
    2239                 :            :                                 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
    2240                 :            :                                   "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
    2241                 :            :                            "group", "free", "frags", "first",
    2242                 :            :                            "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
    2243                 :            :                            "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
    2244                 :            : 
    2245                 :        116 :         i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
    2246                 :            :                 sizeof(struct ext4_group_info);
    2247                 :            :         grinfo = ext4_get_group_info(sb, group);
    2248                 :            :         /* Load the group info in memory only if not already loaded. */
    2249         [ +  + ]:        116 :         if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
    2250                 :         55 :                 err = ext4_mb_load_buddy(sb, group, &e4b);
    2251         [ -  + ]:         55 :                 if (err) {
    2252                 :          0 :                         seq_printf(seq, "#%-5u: I/O error\n", group);
    2253                 :          0 :                         return 0;
    2254                 :            :                 }
    2255                 :            :                 buddy_loaded = 1;
    2256                 :            :         }
    2257                 :            : 
    2258                 :        116 :         memcpy(&sg, ext4_get_group_info(sb, group), i);
    2259                 :            : 
    2260         [ +  + ]:        116 :         if (buddy_loaded)
    2261                 :         55 :                 ext4_mb_unload_buddy(&e4b);
    2262                 :            : 
    2263                 :        116 :         seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
    2264                 :            :                         sg.info.bb_fragments, sg.info.bb_first_free);
    2265         [ +  + ]:       1740 :         for (i = 0; i <= 13; i++)
    2266         [ +  - ]:       1624 :                 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
    2267                 :            :                                 sg.info.bb_counters[i] : 0);
    2268                 :        116 :         seq_printf(seq, " ]\n");
    2269                 :            : 
    2270                 :        116 :         return 0;
    2271                 :            : }
    2272                 :            : 
    2273                 :          0 : static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
    2274                 :            : {
    2275                 :         15 : }
    2276                 :            : 
    2277                 :            : static const struct seq_operations ext4_mb_seq_groups_ops = {
    2278                 :            :         .start  = ext4_mb_seq_groups_start,
    2279                 :            :         .next   = ext4_mb_seq_groups_next,
    2280                 :            :         .stop   = ext4_mb_seq_groups_stop,
    2281                 :            :         .show   = ext4_mb_seq_groups_show,
    2282                 :            : };
    2283                 :            : 
    2284                 :          0 : static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
    2285                 :            : {
    2286                 :          1 :         struct super_block *sb = PDE_DATA(inode);
    2287                 :            :         int rc;
    2288                 :            : 
    2289                 :          1 :         rc = seq_open(file, &ext4_mb_seq_groups_ops);
    2290         [ +  - ]:          1 :         if (rc == 0) {
    2291                 :          1 :                 struct seq_file *m = file->private_data;
    2292                 :          1 :                 m->private = sb;
    2293                 :            :         }
    2294                 :          0 :         return rc;
    2295                 :            : 
    2296                 :            : }
    2297                 :            : 
    2298                 :            : static const struct file_operations ext4_mb_seq_groups_fops = {
    2299                 :            :         .owner          = THIS_MODULE,
    2300                 :            :         .open           = ext4_mb_seq_groups_open,
    2301                 :            :         .read           = seq_read,
    2302                 :            :         .llseek         = seq_lseek,
    2303                 :            :         .release        = seq_release,
    2304                 :            : };
    2305                 :            : 
    2306                 :            : static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
    2307                 :            : {
    2308                 :          0 :         int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
    2309                 :          0 :         struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
    2310                 :            : 
    2311 [ #  # ][ #  # ]:          0 :         BUG_ON(!cachep);
                 [ #  # ]
    2312                 :            :         return cachep;
    2313                 :            : }
    2314                 :            : 
    2315                 :            : /*
    2316                 :            :  * Allocate the top-level s_group_info array for the specified number
    2317                 :            :  * of groups
    2318                 :            :  */
    2319                 :          0 : int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
    2320                 :            : {
    2321                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2322                 :            :         unsigned size;
    2323                 :            :         struct ext4_group_info ***new_groupinfo;
    2324                 :            : 
    2325                 :          0 :         size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
    2326                 :          0 :                 EXT4_DESC_PER_BLOCK_BITS(sb);
    2327         [ #  # ]:          0 :         if (size <= sbi->s_group_info_size)
    2328                 :            :                 return 0;
    2329                 :            : 
    2330 [ #  # ][ #  # ]:          0 :         size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
    2331                 :          0 :         new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
    2332         [ #  # ]:          0 :         if (!new_groupinfo) {
    2333                 :          0 :                 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
    2334                 :          0 :                 return -ENOMEM;
    2335                 :            :         }
    2336         [ #  # ]:          0 :         if (sbi->s_group_info) {
    2337                 :          0 :                 memcpy(new_groupinfo, sbi->s_group_info,
    2338                 :          0 :                        sbi->s_group_info_size * sizeof(*sbi->s_group_info));
    2339                 :          0 :                 ext4_kvfree(sbi->s_group_info);
    2340                 :            :         }
    2341                 :          0 :         sbi->s_group_info = new_groupinfo;
    2342                 :          0 :         sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
    2343                 :            :         ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", 
    2344                 :            :                    sbi->s_group_info_size);
    2345                 :          0 :         return 0;
    2346                 :            : }
    2347                 :            : 
    2348                 :            : /* Create and initialize ext4_group_info data for the given group. */
    2349                 :          0 : int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
    2350                 :            :                           struct ext4_group_desc *desc)
    2351                 :            : {
    2352                 :            :         int i;
    2353                 :            :         int metalen = 0;
    2354                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2355                 :            :         struct ext4_group_info **meta_group_info;
    2356                 :          0 :         struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
    2357                 :            : 
    2358                 :            :         /*
    2359                 :            :          * First check if this group is the first of a reserved block.
    2360                 :            :          * If it's true, we have to allocate a new table of pointers
    2361                 :            :          * to ext4_group_info structures
    2362                 :            :          */
    2363         [ #  # ]:          0 :         if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
    2364                 :          0 :                 metalen = sizeof(*meta_group_info) <<
    2365                 :          0 :                         EXT4_DESC_PER_BLOCK_BITS(sb);
    2366                 :            :                 meta_group_info = kmalloc(metalen, GFP_KERNEL);
    2367         [ #  # ]:          0 :                 if (meta_group_info == NULL) {
    2368                 :          0 :                         ext4_msg(sb, KERN_ERR, "can't allocate mem "
    2369                 :            :                                  "for a buddy group");
    2370                 :          0 :                         goto exit_meta_group_info;
    2371                 :            :                 }
    2372                 :          0 :                 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
    2373                 :            :                         meta_group_info;
    2374                 :            :         }
    2375                 :            : 
    2376                 :          0 :         meta_group_info =
    2377                 :          0 :                 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
    2378                 :          0 :         i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
    2379                 :            : 
    2380                 :          0 :         meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
    2381         [ #  # ]:          0 :         if (meta_group_info[i] == NULL) {
    2382                 :          0 :                 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
    2383                 :            :                 goto exit_group_info;
    2384                 :            :         }
    2385                 :          0 :         set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
    2386                 :            :                 &(meta_group_info[i]->bb_state));
    2387                 :            : 
    2388                 :            :         /*
    2389                 :            :          * initialize bb_free to be able to skip
    2390                 :            :          * empty groups without initialization
    2391                 :            :          */
    2392         [ #  # ]:          0 :         if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
    2393                 :          0 :                 meta_group_info[i]->bb_free =
    2394                 :          0 :                         ext4_free_clusters_after_init(sb, group, desc);
    2395                 :            :         } else {
    2396                 :          0 :                 meta_group_info[i]->bb_free =
    2397                 :          0 :                         ext4_free_group_clusters(sb, desc);
    2398                 :            :         }
    2399                 :            : 
    2400                 :          0 :         INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
    2401                 :          0 :         init_rwsem(&meta_group_info[i]->alloc_sem);
    2402                 :          0 :         meta_group_info[i]->bb_free_root = RB_ROOT;
    2403                 :          0 :         meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
    2404                 :            : 
    2405                 :            : #ifdef DOUBLE_CHECK
    2406                 :            :         {
    2407                 :            :                 struct buffer_head *bh;
    2408                 :            :                 meta_group_info[i]->bb_bitmap =
    2409                 :            :                         kmalloc(sb->s_blocksize, GFP_KERNEL);
    2410                 :            :                 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
    2411                 :            :                 bh = ext4_read_block_bitmap(sb, group);
    2412                 :            :                 BUG_ON(bh == NULL);
    2413                 :            :                 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
    2414                 :            :                         sb->s_blocksize);
    2415                 :            :                 put_bh(bh);
    2416                 :            :         }
    2417                 :            : #endif
    2418                 :            : 
    2419                 :          0 :         return 0;
    2420                 :            : 
    2421                 :            : exit_group_info:
    2422                 :            :         /* If a meta_group_info table has been allocated, release it now */
    2423         [ #  # ]:          0 :         if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
    2424                 :          0 :                 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
    2425                 :          0 :                 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
    2426                 :            :         }
    2427                 :            : exit_meta_group_info:
    2428                 :            :         return -ENOMEM;
    2429                 :            : } /* ext4_mb_add_groupinfo */
    2430                 :            : 
    2431                 :          0 : static int ext4_mb_init_backend(struct super_block *sb)
    2432                 :            : {
    2433                 :            :         ext4_group_t ngroups = ext4_get_groups_count(sb);
    2434                 :            :         ext4_group_t i;
    2435                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2436                 :            :         int err;
    2437                 :            :         struct ext4_group_desc *desc;
    2438                 :            :         struct kmem_cache *cachep;
    2439                 :            : 
    2440                 :          0 :         err = ext4_mb_alloc_groupinfo(sb, ngroups);
    2441         [ #  # ]:          0 :         if (err)
    2442                 :            :                 return err;
    2443                 :            : 
    2444                 :          0 :         sbi->s_buddy_cache = new_inode(sb);
    2445         [ #  # ]:          0 :         if (sbi->s_buddy_cache == NULL) {
    2446                 :          0 :                 ext4_msg(sb, KERN_ERR, "can't get new inode");
    2447                 :          0 :                 goto err_freesgi;
    2448                 :            :         }
    2449                 :            :         /* To avoid potentially colliding with an valid on-disk inode number,
    2450                 :            :          * use EXT4_BAD_INO for the buddy cache inode number.  This inode is
    2451                 :            :          * not in the inode hash, so it should never be found by iget(), but
    2452                 :            :          * this will avoid confusion if it ever shows up during debugging. */
    2453                 :          0 :         sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
    2454                 :          0 :         EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
    2455         [ #  # ]:          0 :         for (i = 0; i < ngroups; i++) {
    2456                 :          0 :                 desc = ext4_get_group_desc(sb, i, NULL);
    2457         [ #  # ]:          0 :                 if (desc == NULL) {
    2458                 :          0 :                         ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
    2459                 :          0 :                         goto err_freebuddy;
    2460                 :            :                 }
    2461         [ #  # ]:          0 :                 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
    2462                 :            :                         goto err_freebuddy;
    2463                 :            :         }
    2464                 :            : 
    2465                 :            :         return 0;
    2466                 :            : 
    2467                 :            : err_freebuddy:
    2468                 :          0 :         cachep = get_groupinfo_cache(sb->s_blocksize_bits);
    2469         [ #  # ]:          0 :         while (i-- > 0)
    2470                 :          0 :                 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
    2471                 :          0 :         i = sbi->s_group_info_size;
    2472         [ #  # ]:          0 :         while (i-- > 0)
    2473                 :          0 :                 kfree(sbi->s_group_info[i]);
    2474                 :          0 :         iput(sbi->s_buddy_cache);
    2475                 :            : err_freesgi:
    2476                 :          0 :         ext4_kvfree(sbi->s_group_info);
    2477                 :          0 :         return -ENOMEM;
    2478                 :            : }
    2479                 :            : 
    2480                 :          0 : static void ext4_groupinfo_destroy_slabs(void)
    2481                 :            : {
    2482                 :            :         int i;
    2483                 :            : 
    2484         [ #  # ]:          0 :         for (i = 0; i < NR_GRPINFO_CACHES; i++) {
    2485         [ #  # ]:          0 :                 if (ext4_groupinfo_caches[i])
    2486                 :          0 :                         kmem_cache_destroy(ext4_groupinfo_caches[i]);
    2487                 :          0 :                 ext4_groupinfo_caches[i] = NULL;
    2488                 :            :         }
    2489                 :          0 : }
    2490                 :            : 
    2491                 :          0 : static int ext4_groupinfo_create_slab(size_t size)
    2492                 :            : {
    2493                 :            :         static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
    2494                 :            :         int slab_size;
    2495 [ #  # ][ #  # ]:          0 :         int blocksize_bits = order_base_2(size);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
    2496                 :          0 :         int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
    2497                 :            :         struct kmem_cache *cachep;
    2498                 :            : 
    2499         [ #  # ]:          0 :         if (cache_index >= NR_GRPINFO_CACHES)
    2500                 :            :                 return -EINVAL;
    2501                 :            : 
    2502         [ #  # ]:          0 :         if (unlikely(cache_index < 0))
    2503                 :            :                 cache_index = 0;
    2504                 :            : 
    2505                 :          0 :         mutex_lock(&ext4_grpinfo_slab_create_mutex);
    2506         [ #  # ]:          0 :         if (ext4_groupinfo_caches[cache_index]) {
    2507                 :          0 :                 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
    2508                 :          0 :                 return 0;       /* Already created */
    2509                 :            :         }
    2510                 :            : 
    2511                 :          0 :         slab_size = offsetof(struct ext4_group_info,
    2512                 :            :                                 bb_counters[blocksize_bits + 2]);
    2513                 :            : 
    2514                 :          0 :         cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
    2515                 :            :                                         slab_size, 0, SLAB_RECLAIM_ACCOUNT,
    2516                 :            :                                         NULL);
    2517                 :            : 
    2518                 :          0 :         ext4_groupinfo_caches[cache_index] = cachep;
    2519                 :            : 
    2520                 :          0 :         mutex_unlock(&ext4_grpinfo_slab_create_mutex);
    2521         [ #  # ]:          0 :         if (!cachep) {
    2522                 :          0 :                 printk(KERN_EMERG
    2523                 :            :                        "EXT4-fs: no memory for groupinfo slab cache\n");
    2524                 :          0 :                 return -ENOMEM;
    2525                 :            :         }
    2526                 :            : 
    2527                 :            :         return 0;
    2528                 :            : }
    2529                 :            : 
    2530                 :          0 : int ext4_mb_init(struct super_block *sb)
    2531                 :            : {
    2532                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2533                 :            :         unsigned i, j;
    2534                 :            :         unsigned offset;
    2535                 :            :         unsigned max;
    2536                 :            :         int ret;
    2537                 :            : 
    2538                 :          0 :         i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
    2539                 :            : 
    2540                 :          0 :         sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
    2541         [ #  # ]:          0 :         if (sbi->s_mb_offsets == NULL) {
    2542                 :            :                 ret = -ENOMEM;
    2543                 :            :                 goto out;
    2544                 :            :         }
    2545                 :            : 
    2546                 :          0 :         i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
    2547                 :          0 :         sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
    2548         [ #  # ]:          0 :         if (sbi->s_mb_maxs == NULL) {
    2549                 :            :                 ret = -ENOMEM;
    2550                 :            :                 goto out;
    2551                 :            :         }
    2552                 :            : 
    2553                 :          0 :         ret = ext4_groupinfo_create_slab(sb->s_blocksize);
    2554         [ #  # ]:          0 :         if (ret < 0)
    2555                 :            :                 goto out;
    2556                 :            : 
    2557                 :            :         /* order 0 is regular bitmap */
    2558                 :          0 :         sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
    2559                 :          0 :         sbi->s_mb_offsets[0] = 0;
    2560                 :            : 
    2561                 :            :         i = 1;
    2562                 :            :         offset = 0;
    2563                 :          0 :         max = sb->s_blocksize << 2;
    2564                 :            :         do {
    2565                 :          0 :                 sbi->s_mb_offsets[i] = offset;
    2566                 :          0 :                 sbi->s_mb_maxs[i] = max;
    2567                 :          0 :                 offset += 1 << (sb->s_blocksize_bits - i);
    2568                 :          0 :                 max = max >> 1;
    2569                 :          0 :                 i++;
    2570         [ #  # ]:          0 :         } while (i <= sb->s_blocksize_bits + 1);
    2571                 :            : 
    2572                 :          0 :         spin_lock_init(&sbi->s_md_lock);
    2573                 :          0 :         spin_lock_init(&sbi->s_bal_lock);
    2574                 :            : 
    2575                 :          0 :         sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
    2576                 :          0 :         sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
    2577                 :          0 :         sbi->s_mb_stats = MB_DEFAULT_STATS;
    2578                 :          0 :         sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
    2579                 :          0 :         sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
    2580                 :            :         /*
    2581                 :            :          * The default group preallocation is 512, which for 4k block
    2582                 :            :          * sizes translates to 2 megabytes.  However for bigalloc file
    2583                 :            :          * systems, this is probably too big (i.e, if the cluster size
    2584                 :            :          * is 1 megabyte, then group preallocation size becomes half a
    2585                 :            :          * gigabyte!).  As a default, we will keep a two megabyte
    2586                 :            :          * group pralloc size for cluster sizes up to 64k, and after
    2587                 :            :          * that, we will force a minimum group preallocation size of
    2588                 :            :          * 32 clusters.  This translates to 8 megs when the cluster
    2589                 :            :          * size is 256k, and 32 megs when the cluster size is 1 meg,
    2590                 :            :          * which seems reasonable as a default.
    2591                 :            :          */
    2592                 :          0 :         sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
    2593                 :            :                                        sbi->s_cluster_bits, 32);
    2594                 :            :         /*
    2595                 :            :          * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
    2596                 :            :          * to the lowest multiple of s_stripe which is bigger than
    2597                 :            :          * the s_mb_group_prealloc as determined above. We want
    2598                 :            :          * the preallocation size to be an exact multiple of the
    2599                 :            :          * RAID stripe size so that preallocations don't fragment
    2600                 :            :          * the stripes.
    2601                 :            :          */
    2602         [ #  # ]:          0 :         if (sbi->s_stripe > 1) {
    2603                 :          0 :                 sbi->s_mb_group_prealloc = roundup(
    2604                 :            :                         sbi->s_mb_group_prealloc, sbi->s_stripe);
    2605                 :            :         }
    2606                 :            : 
    2607                 :          0 :         sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
    2608         [ #  # ]:          0 :         if (sbi->s_locality_groups == NULL) {
    2609                 :            :                 ret = -ENOMEM;
    2610                 :            :                 goto out_free_groupinfo_slab;
    2611                 :            :         }
    2612         [ #  # ]:          0 :         for_each_possible_cpu(i) {
    2613                 :            :                 struct ext4_locality_group *lg;
    2614                 :          0 :                 lg = per_cpu_ptr(sbi->s_locality_groups, i);
    2615                 :          0 :                 mutex_init(&lg->lg_mutex);
    2616         [ #  # ]:          0 :                 for (j = 0; j < PREALLOC_TB_SIZE; j++)
    2617                 :          0 :                         INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
    2618                 :          0 :                 spin_lock_init(&lg->lg_prealloc_lock);
    2619                 :            :         }
    2620                 :            : 
    2621                 :            :         /* init file for buddy data */
    2622                 :          0 :         ret = ext4_mb_init_backend(sb);
    2623         [ #  # ]:          0 :         if (ret != 0)
    2624                 :            :                 goto out_free_locality_groups;
    2625                 :            : 
    2626         [ #  # ]:          0 :         if (sbi->s_proc)
    2627                 :          0 :                 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
    2628                 :            :                                  &ext4_mb_seq_groups_fops, sb);
    2629                 :            : 
    2630                 :            :         return 0;
    2631                 :            : 
    2632                 :            : out_free_locality_groups:
    2633                 :          0 :         free_percpu(sbi->s_locality_groups);
    2634                 :          0 :         sbi->s_locality_groups = NULL;
    2635                 :            : out_free_groupinfo_slab:
    2636                 :          0 :         ext4_groupinfo_destroy_slabs();
    2637                 :            : out:
    2638                 :          0 :         kfree(sbi->s_mb_offsets);
    2639                 :          0 :         sbi->s_mb_offsets = NULL;
    2640                 :          0 :         kfree(sbi->s_mb_maxs);
    2641                 :          0 :         sbi->s_mb_maxs = NULL;
    2642                 :          0 :         return ret;
    2643                 :            : }
    2644                 :            : 
    2645                 :            : /* need to called with the ext4 group lock held */
    2646                 :          0 : static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
    2647                 :            : {
    2648                 :            :         struct ext4_prealloc_space *pa;
    2649                 :            :         struct list_head *cur, *tmp;
    2650                 :            :         int count = 0;
    2651                 :            : 
    2652         [ #  # ]:          0 :         list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
    2653                 :          0 :                 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
    2654                 :            :                 list_del(&pa->pa_group_list);
    2655                 :            :                 count++;
    2656                 :          0 :                 kmem_cache_free(ext4_pspace_cachep, pa);
    2657                 :            :         }
    2658                 :            :         if (count)
    2659                 :            :                 mb_debug(1, "mballoc: %u PAs left\n", count);
    2660                 :            : 
    2661                 :          0 : }
    2662                 :            : 
    2663                 :          0 : int ext4_mb_release(struct super_block *sb)
    2664                 :            : {
    2665                 :            :         ext4_group_t ngroups = ext4_get_groups_count(sb);
    2666                 :            :         ext4_group_t i;
    2667                 :            :         int num_meta_group_infos;
    2668                 :            :         struct ext4_group_info *grinfo;
    2669                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    2670                 :          0 :         struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
    2671                 :            : 
    2672         [ #  # ]:          0 :         if (sbi->s_proc)
    2673                 :          0 :                 remove_proc_entry("mb_groups", sbi->s_proc);
    2674                 :            : 
    2675         [ #  # ]:          0 :         if (sbi->s_group_info) {
    2676         [ #  # ]:          0 :                 for (i = 0; i < ngroups; i++) {
    2677                 :            :                         grinfo = ext4_get_group_info(sb, i);
    2678                 :            : #ifdef DOUBLE_CHECK
    2679                 :            :                         kfree(grinfo->bb_bitmap);
    2680                 :            : #endif
    2681                 :            :                         ext4_lock_group(sb, i);
    2682                 :          0 :                         ext4_mb_cleanup_pa(grinfo);
    2683                 :            :                         ext4_unlock_group(sb, i);
    2684                 :          0 :                         kmem_cache_free(cachep, grinfo);
    2685                 :            :                 }
    2686                 :          0 :                 num_meta_group_infos = (ngroups +
    2687                 :          0 :                                 EXT4_DESC_PER_BLOCK(sb) - 1) >>
    2688                 :          0 :                         EXT4_DESC_PER_BLOCK_BITS(sb);
    2689         [ #  # ]:          0 :                 for (i = 0; i < num_meta_group_infos; i++)
    2690                 :          0 :                         kfree(sbi->s_group_info[i]);
    2691                 :          0 :                 ext4_kvfree(sbi->s_group_info);
    2692                 :            :         }
    2693                 :          0 :         kfree(sbi->s_mb_offsets);
    2694                 :          0 :         kfree(sbi->s_mb_maxs);
    2695         [ #  # ]:          0 :         if (sbi->s_buddy_cache)
    2696                 :          0 :                 iput(sbi->s_buddy_cache);
    2697         [ #  # ]:          0 :         if (sbi->s_mb_stats) {
    2698                 :          0 :                 ext4_msg(sb, KERN_INFO,
    2699                 :            :                        "mballoc: %u blocks %u reqs (%u success)",
    2700                 :            :                                 atomic_read(&sbi->s_bal_allocated),
    2701                 :            :                                 atomic_read(&sbi->s_bal_reqs),
    2702                 :            :                                 atomic_read(&sbi->s_bal_success));
    2703                 :          0 :                 ext4_msg(sb, KERN_INFO,
    2704                 :            :                       "mballoc: %u extents scanned, %u goal hits, "
    2705                 :            :                                 "%u 2^N hits, %u breaks, %u lost",
    2706                 :            :                                 atomic_read(&sbi->s_bal_ex_scanned),
    2707                 :            :                                 atomic_read(&sbi->s_bal_goals),
    2708                 :            :                                 atomic_read(&sbi->s_bal_2orders),
    2709                 :            :                                 atomic_read(&sbi->s_bal_breaks),
    2710                 :            :                                 atomic_read(&sbi->s_mb_lost_chunks));
    2711                 :          0 :                 ext4_msg(sb, KERN_INFO,
    2712                 :            :                        "mballoc: %lu generated and it took %Lu",
    2713                 :            :                                 sbi->s_mb_buddies_generated,
    2714                 :            :                                 sbi->s_mb_generation_time);
    2715                 :          0 :                 ext4_msg(sb, KERN_INFO,
    2716                 :            :                        "mballoc: %u preallocated, %u discarded",
    2717                 :            :                                 atomic_read(&sbi->s_mb_preallocated),
    2718                 :            :                                 atomic_read(&sbi->s_mb_discarded));
    2719                 :            :         }
    2720                 :            : 
    2721                 :          0 :         free_percpu(sbi->s_locality_groups);
    2722                 :            : 
    2723                 :          0 :         return 0;
    2724                 :            : }
    2725                 :            : 
    2726                 :          0 : static inline int ext4_issue_discard(struct super_block *sb,
    2727                 :            :                 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
    2728                 :            : {
    2729                 :            :         ext4_fsblk_t discard_block;
    2730                 :            : 
    2731                 :          0 :         discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
    2732                 :            :                          ext4_group_first_block_no(sb, block_group));
    2733                 :          0 :         count = EXT4_C2B(EXT4_SB(sb), count);
    2734                 :          0 :         trace_ext4_discard_blocks(sb,
    2735                 :            :                         (unsigned long long) discard_block, count);
    2736                 :            :         return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
    2737                 :            : }
    2738                 :            : 
    2739                 :            : /*
    2740                 :            :  * This function is called by the jbd2 layer once the commit has finished,
    2741                 :            :  * so we know we can free the blocks that were released with that commit.
    2742                 :            :  */
    2743                 :          0 : static void ext4_free_data_callback(struct super_block *sb,
    2744                 :            :                                     struct ext4_journal_cb_entry *jce,
    2745                 :            :                                     int rc)
    2746                 :            : {
    2747                 :            :         struct ext4_free_data *entry = (struct ext4_free_data *)jce;
    2748                 :            :         struct ext4_buddy e4b;
    2749                 :            :         struct ext4_group_info *db;
    2750                 :            :         int err, count = 0, count2 = 0;
    2751                 :            : 
    2752                 :            :         mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
    2753                 :            :                  entry->efd_count, entry->efd_group, entry);
    2754                 :            : 
    2755         [ -  + ]:      70590 :         if (test_opt(sb, DISCARD)) {
    2756                 :          0 :                 err = ext4_issue_discard(sb, entry->efd_group,
    2757                 :            :                                          entry->efd_start_cluster,
    2758                 :            :                                          entry->efd_count);
    2759         [ #  # ]:          0 :                 if (err && err != -EOPNOTSUPP)
    2760                 :          0 :                         ext4_msg(sb, KERN_WARNING, "discard request in"
    2761                 :            :                                  " group:%d block:%d count:%d failed"
    2762                 :            :                                  " with %d", entry->efd_group,
    2763                 :            :                                  entry->efd_start_cluster,
    2764                 :            :                                  entry->efd_count, err);
    2765                 :            :         }
    2766                 :            : 
    2767                 :      70590 :         err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
    2768                 :            :         /* we expect to find existing buddy because it's pinned */
    2769         [ -  + ]:      70590 :         BUG_ON(err != 0);
    2770                 :            : 
    2771                 :            : 
    2772                 :      70590 :         db = e4b.bd_info;
    2773                 :            :         /* there are blocks to put in buddy to make them really free */
    2774                 :            :         count += entry->efd_count;
    2775                 :            :         count2++;
    2776                 :      70590 :         ext4_lock_group(sb, entry->efd_group);
    2777                 :            :         /* Take it out of per group rb tree */
    2778                 :      70590 :         rb_erase(&entry->efd_node, &(db->bb_free_root));
    2779                 :      70590 :         mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
    2780                 :            : 
    2781                 :            :         /*
    2782                 :            :          * Clear the trimmed flag for the group so that the next
    2783                 :            :          * ext4_trim_fs can trim it.
    2784                 :            :          * If the volume is mounted with -o discard, online discard
    2785                 :            :          * is supported and the free blocks will be trimmed online.
    2786                 :            :          */
    2787         [ +  - ]:      70590 :         if (!test_opt(sb, DISCARD))
    2788                 :      70590 :                 EXT4_MB_GRP_CLEAR_TRIMMED(db);
    2789                 :            : 
    2790         [ +  + ]:      70590 :         if (!db->bb_free_root.rb_node) {
    2791                 :            :                 /* No more items in the per group rb tree
    2792                 :            :                  * balance refcounts from ext4_mb_free_metadata()
    2793                 :            :                  */
    2794                 :       3477 :                 page_cache_release(e4b.bd_buddy_page);
    2795                 :       3477 :                 page_cache_release(e4b.bd_bitmap_page);
    2796                 :            :         }
    2797                 :      70590 :         ext4_unlock_group(sb, entry->efd_group);
    2798                 :      70590 :         kmem_cache_free(ext4_free_data_cachep, entry);
    2799                 :      70590 :         ext4_mb_unload_buddy(&e4b);
    2800                 :            : 
    2801                 :            :         mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
    2802                 :      70590 : }
    2803                 :            : 
    2804                 :          0 : int __init ext4_init_mballoc(void)
    2805                 :            : {
    2806                 :          0 :         ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
    2807                 :            :                                         SLAB_RECLAIM_ACCOUNT);
    2808         [ #  # ]:          0 :         if (ext4_pspace_cachep == NULL)
    2809                 :            :                 return -ENOMEM;
    2810                 :            : 
    2811                 :          0 :         ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
    2812                 :            :                                     SLAB_RECLAIM_ACCOUNT);
    2813         [ #  # ]:          0 :         if (ext4_ac_cachep == NULL) {
    2814                 :          0 :                 kmem_cache_destroy(ext4_pspace_cachep);
    2815                 :          0 :                 return -ENOMEM;
    2816                 :            :         }
    2817                 :            : 
    2818                 :          0 :         ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
    2819                 :            :                                            SLAB_RECLAIM_ACCOUNT);
    2820         [ #  # ]:          0 :         if (ext4_free_data_cachep == NULL) {
    2821                 :          0 :                 kmem_cache_destroy(ext4_pspace_cachep);
    2822                 :          0 :                 kmem_cache_destroy(ext4_ac_cachep);
    2823                 :          0 :                 return -ENOMEM;
    2824                 :            :         }
    2825                 :            :         return 0;
    2826                 :            : }
    2827                 :            : 
    2828                 :          0 : void ext4_exit_mballoc(void)
    2829                 :            : {
    2830                 :            :         /*
    2831                 :            :          * Wait for completion of call_rcu()'s on ext4_pspace_cachep
    2832                 :            :          * before destroying the slab cache.
    2833                 :            :          */
    2834                 :          0 :         rcu_barrier();
    2835                 :          0 :         kmem_cache_destroy(ext4_pspace_cachep);
    2836                 :          0 :         kmem_cache_destroy(ext4_ac_cachep);
    2837                 :          0 :         kmem_cache_destroy(ext4_free_data_cachep);
    2838                 :          0 :         ext4_groupinfo_destroy_slabs();
    2839                 :          0 : }
    2840                 :            : 
    2841                 :            : 
    2842                 :            : /*
    2843                 :            :  * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
    2844                 :            :  * Returns 0 if success or error code
    2845                 :            :  */
    2846                 :            : static noinline_for_stack int
    2847                 :          0 : ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
    2848                 :            :                                 handle_t *handle, unsigned int reserv_clstrs)
    2849                 :            : {
    2850                 :            :         struct buffer_head *bitmap_bh = NULL;
    2851                 :            :         struct ext4_group_desc *gdp;
    2852                 :            :         struct buffer_head *gdp_bh;
    2853                 :            :         struct ext4_sb_info *sbi;
    2854                 :     217530 :         struct super_block *sb;
    2855                 :            :         ext4_fsblk_t block;
    2856                 :            :         int err, len;
    2857                 :            : 
    2858         [ -  + ]:     217530 :         BUG_ON(ac->ac_status != AC_STATUS_FOUND);
    2859         [ -  + ]:     217530 :         BUG_ON(ac->ac_b_ex.fe_len <= 0);
    2860                 :            : 
    2861                 :     217530 :         sb = ac->ac_sb;
    2862                 :            :         sbi = EXT4_SB(sb);
    2863                 :            : 
    2864                 :            :         err = -EIO;
    2865                 :     217530 :         bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
    2866         [ +  - ]:     217535 :         if (!bitmap_bh)
    2867                 :            :                 goto out_err;
    2868                 :            : 
    2869                 :     217535 :         err = ext4_journal_get_write_access(handle, bitmap_bh);
    2870         [ +  + ]:     217538 :         if (err)
    2871                 :            :                 goto out_err;
    2872                 :            : 
    2873                 :            :         err = -EIO;
    2874                 :     217535 :         gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
    2875            [ + ]:     217538 :         if (!gdp)
    2876                 :            :                 goto out_err;
    2877                 :            : 
    2878                 :     217539 :         ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
    2879                 :            :                         ext4_free_group_clusters(sb, gdp));
    2880                 :            : 
    2881                 :     217534 :         err = ext4_journal_get_write_access(handle, gdp_bh);
    2882         [ +  - ]:     217541 :         if (err)
    2883                 :            :                 goto out_err;
    2884                 :            : 
    2885                 :     217541 :         block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
    2886                 :            : 
    2887                 :     217541 :         len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
    2888         [ -  + ]:     217541 :         if (!ext4_data_block_valid(sbi, block, len)) {
    2889                 :          0 :                 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
    2890                 :            :                            "fs metadata", block, block+len);
    2891                 :            :                 /* File system mounted not to panic on error
    2892                 :            :                  * Fix the bitmap and repeat the block allocation
    2893                 :            :                  * We leak some of the blocks here.
    2894                 :            :                  */
    2895                 :          0 :                 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
    2896                 :          0 :                 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
    2897                 :            :                               ac->ac_b_ex.fe_len);
    2898                 :          0 :                 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
    2899                 :          0 :                 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
    2900         [ #  # ]:          0 :                 if (!err)
    2901                 :            :                         err = -EAGAIN;
    2902                 :            :                 goto out_err;
    2903                 :            :         }
    2904                 :            : 
    2905                 :     217539 :         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
    2906                 :            : #ifdef AGGRESSIVE_CHECK
    2907                 :            :         {
    2908                 :            :                 int i;
    2909                 :            :                 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
    2910                 :            :                         BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
    2911                 :            :                                                 bitmap_bh->b_data));
    2912                 :            :                 }
    2913                 :            :         }
    2914                 :            : #endif
    2915                 :     217540 :         ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
    2916                 :            :                       ac->ac_b_ex.fe_len);
    2917         [ +  + ]:     217541 :         if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
    2918                 :         37 :                 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
    2919                 :         37 :                 ext4_free_group_clusters_set(sb, gdp,
    2920                 :            :                                              ext4_free_clusters_after_init(sb,
    2921                 :            :                                                 ac->ac_b_ex.fe_group, gdp));
    2922                 :            :         }
    2923                 :     217541 :         len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
    2924                 :     217541 :         ext4_free_group_clusters_set(sb, gdp, len);
    2925                 :     217537 :         ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
    2926                 :     217536 :         ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
    2927                 :            : 
    2928                 :     217539 :         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
    2929                 :     217536 :         percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
    2930                 :            :         /*
    2931                 :            :          * Now reduce the dirty block count also. Should not go negative
    2932                 :            :          */
    2933         [ +  + ]:     217540 :         if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
    2934                 :            :                 /* release all the reserved blocks if non delalloc */
    2935                 :     119386 :                 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
    2936                 :            :                                    reserv_clstrs);
    2937                 :            : 
    2938         [ +  - ]:     217536 :         if (sbi->s_log_groups_per_flex) {
    2939                 :     217536 :                 ext4_group_t flex_group = ext4_flex_group(sbi,
    2940                 :            :                                                           ac->ac_b_ex.fe_group);
    2941                 :     435072 :                 atomic64_sub(ac->ac_b_ex.fe_len,
    2942                 :     217536 :                              &sbi->s_flex_groups[flex_group].free_clusters);
    2943                 :            :         }
    2944                 :            : 
    2945                 :     217540 :         err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
    2946         [ +  - ]:     217541 :         if (err)
    2947                 :            :                 goto out_err;
    2948                 :     217541 :         err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
    2949                 :            : 
    2950                 :            : out_err:
    2951                 :            :         brelse(bitmap_bh);
    2952                 :     217540 :         return err;
    2953                 :            : }
    2954                 :            : 
    2955                 :            : /*
    2956                 :            :  * here we normalize request for locality group
    2957                 :            :  * Group request are normalized to s_mb_group_prealloc, which goes to
    2958                 :            :  * s_strip if we set the same via mount option.
    2959                 :            :  * s_mb_group_prealloc can be configured via
    2960                 :            :  * /sys/fs/ext4/<partition>/mb_group_prealloc
    2961                 :            :  *
    2962                 :            :  * XXX: should we try to preallocate more than the group has now?
    2963                 :            :  */
    2964                 :          0 : static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
    2965                 :            : {
    2966                 :         40 :         struct super_block *sb = ac->ac_sb;
    2967                 :         20 :         struct ext4_locality_group *lg = ac->ac_lg;
    2968                 :            : 
    2969         [ -  + ]:         20 :         BUG_ON(lg == NULL);
    2970                 :         20 :         ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
    2971                 :            :         mb_debug(1, "#%u: goal %u blocks for locality group\n",
    2972                 :            :                 current->pid, ac->ac_g_ex.fe_len);
    2973                 :         20 : }
    2974                 :            : 
    2975                 :            : /*
    2976                 :            :  * Normalization means making request better in terms of
    2977                 :            :  * size and alignment
    2978                 :            :  */
    2979                 :            : static noinline_for_stack void
    2980                 :          0 : ext4_mb_normalize_request(struct ext4_allocation_context *ac,
    2981                 :            :                                 struct ext4_allocation_request *ar)
    2982                 :            : {
    2983                 :     352724 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    2984                 :            :         int bsbits, max;
    2985                 :            :         ext4_lblk_t end;
    2986                 :            :         loff_t size, start_off;
    2987                 :            :         loff_t orig_size __maybe_unused;
    2988                 :            :         ext4_lblk_t start;
    2989                 :      88751 :         struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
    2990                 :            :         struct ext4_prealloc_space *pa;
    2991                 :            : 
    2992                 :            :         /* do normalize only data requests, metadata requests
    2993                 :            :            do not need preallocation */
    2994         [ +  + ]:      88751 :         if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
    2995                 :            :                 return;
    2996                 :            : 
    2997                 :            :         /* sometime caller may want exact blocks */
    2998         [ +  + ]:      38802 :         if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
    2999                 :            :                 return;
    3000                 :            : 
    3001                 :            :         /* caller may indicate that preallocation isn't
    3002                 :            :          * required (it's a tail, for example) */
    3003         [ +  + ]:      38798 :         if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
    3004                 :            :                 return;
    3005                 :            : 
    3006         [ +  + ]:      25261 :         if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
    3007                 :         20 :                 ext4_mb_normalize_group_request(ac);
    3008                 :         20 :                 return ;
    3009                 :            :         }
    3010                 :            : 
    3011                 :      25241 :         bsbits = ac->ac_sb->s_blocksize_bits;
    3012                 :            : 
    3013                 :            :         /* first, let's learn actual file size
    3014                 :            :          * given current request is allocated */
    3015                 :      25241 :         size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
    3016                 :      25256 :         size = size << bsbits;
    3017         [ +  + ]:      25241 :         if (size < i_size_read(ac->ac_inode))
    3018                 :      24411 :                 size = i_size_read(ac->ac_inode);
    3019                 :            :         orig_size = size;
    3020                 :            : 
    3021                 :            :         /* max size of free chunks */
    3022                 :      25240 :         max = 2 << bsbits;
    3023                 :            : 
    3024                 :            : #define NRL_CHECK_SIZE(req, size, max, chunk_size)      \
    3025                 :            :                 (req <= (size) || max <= (chunk_size))
    3026                 :            : 
    3027                 :            :         /* first, try to predict filesize */
    3028                 :            :         /* XXX: should this table be tunable? */
    3029                 :            :         start_off = 0;
    3030         [ +  - ]:     113991 :         if (size <= 16 * 1024) {
    3031                 :            :                 size = 16 * 1024;
    3032         [ +  + ]:      25240 :         } else if (size <= 32 * 1024) {
    3033                 :            :                 size = 32 * 1024;
    3034            [ + ]:      25235 :         } else if (size <= 64 * 1024) {
    3035                 :            :                 size = 64 * 1024;
    3036         [ +  + ]:      25239 :         } else if (size <= 128 * 1024) {
    3037                 :            :                 size = 128 * 1024;
    3038         [ +  + ]:      25153 :         } else if (size <= 256 * 1024) {
    3039                 :            :                 size = 256 * 1024;
    3040         [ +  + ]:      25065 :         } else if (size <= 512 * 1024) {
    3041                 :            :                 size = 512 * 1024;
    3042         [ +  + ]:      24747 :         } else if (size <= 1024 * 1024) {
    3043                 :            :                 size = 1024 * 1024;
    3044         [ +  + ]:        599 :         } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
    3045                 :         70 :                 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
    3046                 :         70 :                                                 (21 - bsbits)) << 21;
    3047                 :            :                 size = 2 * 1024 * 1024;
    3048         [ +  + ]:        529 :         } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
    3049                 :         21 :                 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
    3050                 :         21 :                                                         (22 - bsbits)) << 22;
    3051                 :            :                 size = 4 * 1024 * 1024;
    3052 [ -  + ][ #  # ]:        508 :         } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
    3053                 :            :                                         (8<<20)>>bsbits, max, 8 * 1024)) {
    3054                 :       1016 :                 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
    3055                 :        508 :                                                         (23 - bsbits)) << 23;
    3056                 :        508 :                 size = 8 * 1024 * 1024;
    3057                 :            :         } else {
    3058                 :          0 :                 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
    3059                 :          0 :                 size      = ac->ac_o_ex.fe_len << bsbits;
    3060                 :            :         }
    3061                 :      25240 :         size = size >> bsbits;
    3062                 :      25240 :         start = start_off >> bsbits;
    3063                 :            : 
    3064                 :            :         /* don't cover already allocated blocks in selected range */
    3065 [ +  + ][ +  + ]:      25240 :         if (ar->pleft && start <= ar->lleft) {
    3066                 :      23863 :                 size -= ar->lleft + 1 - start;
    3067                 :      23863 :                 start = ar->lleft + 1;
    3068                 :            :         }
    3069 [ +  + ][ +  + ]:      25240 :         if (ar->pright && start + size - 1 >= ar->lright)
    3070                 :      20883 :                 size -= start + size - ar->lright;
    3071                 :            : 
    3072                 :      25240 :         end = start + size;
    3073                 :            : 
    3074                 :            :         /* check we don't cross already preallocated blocks */
    3075                 :            :         rcu_read_lock();
    3076         [ +  + ]:     144613 :         list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
    3077                 :            :                 ext4_lblk_t pa_end;
    3078                 :            : 
    3079         [ -  + ]:     119372 :                 if (pa->pa_deleted)
    3080                 :          0 :                         continue;
    3081                 :            :                 spin_lock(&pa->pa_lock);
    3082         [ -  + ]:     119360 :                 if (pa->pa_deleted) {
    3083                 :            :                         spin_unlock(&pa->pa_lock);
    3084                 :          0 :                         continue;
    3085                 :            :                 }
    3086                 :            : 
    3087                 :     238720 :                 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
    3088                 :            :                                                   pa->pa_len);
    3089                 :            : 
    3090                 :            :                 /* PA must not overlap original request */
    3091 [ +  + ][ -  + ]:     119360 :                 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
    3092                 :            :                         ac->ac_o_ex.fe_logical < pa->pa_lstart));
    3093                 :            : 
    3094                 :            :                 /* skip PAs this normalized request doesn't overlap with */
    3095 [ +  + ][ +  + ]:     119360 :                 if (pa->pa_lstart >= end || pa_end <= start) {
    3096                 :            :                         spin_unlock(&pa->pa_lock);
    3097                 :     119309 :                         continue;
    3098                 :            :                 }
    3099 [ +  + ][ -  + ]:         65 :                 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
    3100                 :            : 
    3101                 :            :                 /* adjust start or end to be adjacent to this pa */
    3102         [ +  + ]:         65 :                 if (pa_end <= ac->ac_o_ex.fe_logical) {
    3103         [ -  + ]:         63 :                         BUG_ON(pa_end < start);
    3104                 :            :                         start = pa_end;
    3105         [ +  - ]:          2 :                 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
    3106         [ -  + ]:          2 :                         BUG_ON(pa->pa_lstart > end);
    3107                 :            :                         end = pa->pa_lstart;
    3108                 :            :                 }
    3109                 :            :                 spin_unlock(&pa->pa_lock);
    3110                 :            :         }
    3111                 :            :         rcu_read_unlock();
    3112                 :      25241 :         size = end - start;
    3113                 :            : 
    3114                 :            :         /* XXX: extra loop to check we really don't overlap preallocations */
    3115                 :            :         rcu_read_lock();
    3116         [ +  + ]:     144612 :         list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
    3117                 :            :                 ext4_lblk_t pa_end;
    3118                 :            : 
    3119                 :            :                 spin_lock(&pa->pa_lock);
    3120         [ +  + ]:     119373 :                 if (pa->pa_deleted == 0) {
    3121                 :     238744 :                         pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
    3122                 :            :                                                           pa->pa_len);
    3123 [ +  + ][ -  + ]:     119372 :                         BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
    3124                 :            :                 }
    3125                 :            :                 spin_unlock(&pa->pa_lock);
    3126                 :            :         }
    3127                 :            :         rcu_read_unlock();
    3128                 :            : 
    3129 [ -  + ][ #  # ]:      25241 :         if (start + size <= ac->ac_o_ex.fe_logical &&
    3130                 :            :                         start > ac->ac_o_ex.fe_logical) {
    3131                 :          0 :                 ext4_msg(ac->ac_sb, KERN_ERR,
    3132                 :            :                          "start %lu, size %lu, fe_logical %lu",
    3133                 :            :                          (unsigned long) start, (unsigned long) size,
    3134                 :            :                          (unsigned long) ac->ac_o_ex.fe_logical);
    3135                 :            :         }
    3136 [ -  + ][ #  # ]:      25241 :         BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
    3137                 :            :                         start > ac->ac_o_ex.fe_logical);
    3138 [ +  - ][ -  + ]:      25241 :         BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
    3139                 :            : 
    3140                 :            :         /* now prepare goal request */
    3141                 :            : 
    3142                 :            :         /* XXX: is it better to align blocks WRT to logical
    3143                 :            :          * placement or satisfy big request as is */
    3144                 :      25241 :         ac->ac_g_ex.fe_logical = start;
    3145                 :      25241 :         ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
    3146                 :            : 
    3147                 :            :         /* define goal start in order to merge */
    3148 [ +  + ][ +  + ]:      25241 :         if (ar->pright && (ar->lright == (start + size))) {
    3149                 :            :                 /* merge to the right */
    3150                 :      20886 :                 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
    3151                 :            :                                                 &ac->ac_f_ex.fe_group,
    3152                 :            :                                                 &ac->ac_f_ex.fe_start);
    3153                 :      20887 :                 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
    3154                 :            :         }
    3155 [ +  + ][ +  + ]:      25242 :         if (ar->pleft && (ar->lleft + 1 == start)) {
    3156                 :            :                 /* merge to the left */
    3157                 :      24082 :                 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
    3158                 :            :                                                 &ac->ac_f_ex.fe_group,
    3159                 :            :                                                 &ac->ac_f_ex.fe_start);
    3160                 :      24081 :                 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
    3161                 :            :         }
    3162                 :            : 
    3163                 :            :         mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
    3164                 :            :                 (unsigned) orig_size, (unsigned) start);
    3165                 :            : }
    3166                 :            : 
    3167                 :          0 : static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
    3168                 :            : {
    3169                 :     217536 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    3170                 :            : 
    3171 [ -  + ][ #  # ]:     217536 :         if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
    3172                 :          0 :                 atomic_inc(&sbi->s_bal_reqs);
    3173                 :          0 :                 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
    3174         [ #  # ]:          0 :                 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
    3175                 :          0 :                         atomic_inc(&sbi->s_bal_success);
    3176                 :          0 :                 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
    3177 [ #  # ][ #  # ]:          0 :                 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
    3178                 :          0 :                                 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
    3179                 :          0 :                         atomic_inc(&sbi->s_bal_goals);
    3180            [ - ]:          0 :                 if (ac->ac_found > sbi->s_mb_max_to_scan)
    3181                 :          0 :                         atomic_inc(&sbi->s_bal_breaks);
    3182                 :            :         }
    3183                 :            : 
    3184         [ +  + ]:     217529 :         if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
    3185                 :            :                 trace_ext4_mballoc_alloc(ac);
    3186                 :            :         else
    3187                 :            :                 trace_ext4_mballoc_prealloc(ac);
    3188                 :          0 : }
    3189                 :            : 
    3190                 :            : /*
    3191                 :            :  * Called on failure; free up any blocks from the inode PA for this
    3192                 :            :  * context.  We don't need this for MB_GROUP_PA because we only change
    3193                 :            :  * pa_free in ext4_mb_release_context(), but on failure, we've already
    3194                 :            :  * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
    3195                 :            :  */
    3196                 :            : static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
    3197                 :            : {
    3198                 :            :         struct ext4_prealloc_space *pa = ac->ac_pa;
    3199                 :            : 
    3200 [ #  # ][ #  # ]:          0 :         if (pa && pa->pa_type == MB_INODE_PA)
         [ #  # ][ #  # ]
    3201                 :          0 :                 pa->pa_free += ac->ac_b_ex.fe_len;
    3202                 :            : }
    3203                 :            : 
    3204                 :            : /*
    3205                 :            :  * use blocks preallocated to inode
    3206                 :            :  */
    3207                 :          0 : static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
    3208                 :            :                                 struct ext4_prealloc_space *pa)
    3209                 :            : {
    3210                 :     146807 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    3211                 :            :         ext4_fsblk_t start;
    3212                 :            :         ext4_fsblk_t end;
    3213                 :            :         int len;
    3214                 :            : 
    3215                 :            :         /* found preallocated blocks, use them */
    3216                 :     146807 :         start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
    3217                 :     146807 :         end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
    3218                 :            :                   start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
    3219                 :     146807 :         len = EXT4_NUM_B2C(sbi, end - start);
    3220                 :     146807 :         ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
    3221                 :            :                                         &ac->ac_b_ex.fe_start);
    3222                 :     293611 :         ac->ac_b_ex.fe_len = len;
    3223                 :     293611 :         ac->ac_status = AC_STATUS_FOUND;
    3224                 :     293611 :         ac->ac_pa = pa;
    3225                 :            : 
    3226         [ -  + ]:     293611 :         BUG_ON(start < pa->pa_pstart);
    3227         [ -  + ]:     146804 :         BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
    3228         [ -  + ]:     146804 :         BUG_ON(pa->pa_free < len);
    3229                 :     146804 :         pa->pa_free -= len;
    3230                 :            : 
    3231                 :            :         mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
    3232                 :     146804 : }
    3233                 :            : 
    3234                 :            : /*
    3235                 :            :  * use blocks preallocated to locality group
    3236                 :            :  */
    3237                 :          0 : static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
    3238                 :            :                                 struct ext4_prealloc_space *pa)
    3239                 :            : {
    3240                 :       4230 :         unsigned int len = ac->ac_o_ex.fe_len;
    3241                 :            : 
    3242                 :       4230 :         ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
    3243                 :            :                                         &ac->ac_b_ex.fe_group,
    3244                 :            :                                         &ac->ac_b_ex.fe_start);
    3245                 :       4230 :         ac->ac_b_ex.fe_len = len;
    3246                 :       4230 :         ac->ac_status = AC_STATUS_FOUND;
    3247                 :       4230 :         ac->ac_pa = pa;
    3248                 :            : 
    3249                 :            :         /* we don't correct pa_pstart or pa_plen here to avoid
    3250                 :            :          * possible race when the group is being loaded concurrently
    3251                 :            :          * instead we correct pa later, after blocks are marked
    3252                 :            :          * in on-disk bitmap -- see ext4_mb_release_context()
    3253                 :            :          * Other CPUs are prevented from allocating from this pa by lg_mutex
    3254                 :            :          */
    3255                 :            :         mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
    3256                 :       4230 : }
    3257                 :            : 
    3258                 :            : /*
    3259                 :            :  * Return the prealloc space that have minimal distance
    3260                 :            :  * from the goal block. @cpa is the prealloc
    3261                 :            :  * space that is having currently known minimal distance
    3262                 :            :  * from the goal block.
    3263                 :            :  */
    3264                 :            : static struct ext4_prealloc_space *
    3265                 :          0 : ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
    3266                 :            :                         struct ext4_prealloc_space *pa,
    3267                 :            :                         struct ext4_prealloc_space *cpa)
    3268                 :            : {
    3269                 :            :         ext4_fsblk_t cur_distance, new_distance;
    3270                 :            : 
    3271         [ +  + ]:       4473 :         if (cpa == NULL) {
    3272                 :       4210 :                 atomic_inc(&pa->pa_count);
    3273                 :       4210 :                 return pa;
    3274                 :            :         }
    3275                 :        263 :         cur_distance = abs(goal_block - cpa->pa_pstart);
    3276                 :        263 :         new_distance = abs(goal_block - pa->pa_pstart);
    3277                 :            : 
    3278         [ +  + ]:        263 :         if (cur_distance <= new_distance)
    3279                 :            :                 return cpa;
    3280                 :            : 
    3281                 :            :         /* drop the previous reference */
    3282                 :        204 :         atomic_dec(&cpa->pa_count);
    3283                 :        204 :         atomic_inc(&pa->pa_count);
    3284                 :        204 :         return pa;
    3285                 :            : }
    3286                 :            : 
    3287                 :            : /*
    3288                 :            :  * search goal blocks in preallocated space
    3289                 :            :  */
    3290                 :            : static noinline_for_stack int
    3291                 :          0 : ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
    3292                 :            : {
    3293                 :     217520 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    3294                 :            :         int order, i;
    3295                 :     217520 :         struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
    3296                 :            :         struct ext4_locality_group *lg;
    3297                 :            :         struct ext4_prealloc_space *pa, *cpa = NULL;
    3298                 :            :         ext4_fsblk_t goal_block;
    3299                 :            : 
    3300                 :            :         /* only data can be preallocated */
    3301         [ +  + ]:     217520 :         if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
    3302                 :            :                 return 0;
    3303                 :            : 
    3304                 :            :         /* first, try per-file preallocation */
    3305                 :            :         rcu_read_lock();
    3306         [ +  + ]:     342031 :         list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
    3307                 :            : 
    3308                 :            :                 /* all fields in this condition don't change,
    3309                 :            :                  * so we can skip locking for them */
    3310 [ +  + ][ +  + ]:     299018 :                 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
    3311                 :     259029 :                     ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
    3312                 :     259029 :                                                EXT4_C2B(sbi, pa->pa_len)))
    3313                 :     174471 :                         continue;
    3314                 :            : 
    3315                 :            :                 /* non-extent files can't have physical blocks past 2^32 */
    3316 [ -  + ][ #  # ]:     124547 :                 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
    3317                 :          0 :                     (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
    3318                 :            :                      EXT4_MAX_BLOCK_FILE_PHYS))
    3319                 :          0 :                         continue;
    3320                 :            : 
    3321                 :            :                 /* found preallocated blocks, use them */
    3322                 :            :                 spin_lock(&pa->pa_lock);
    3323    [ + ][ +  + ]:     124574 :                 if (pa->pa_deleted == 0 && pa->pa_free) {
    3324                 :     124573 :                         atomic_inc(&pa->pa_count);
    3325                 :     124571 :                         ext4_mb_use_inode_pa(ac, pa);
    3326                 :            :                         spin_unlock(&pa->pa_lock);
    3327                 :     124576 :                         ac->ac_criteria = 10;
    3328                 :            :                         rcu_read_unlock();
    3329                 :     124576 :                         return 1;
    3330                 :            :                 }
    3331                 :            :                 spin_unlock(&pa->pa_lock);
    3332                 :            :         }
    3333                 :            :         rcu_read_unlock();
    3334                 :            : 
    3335                 :            :         /* can we use group allocation? */
    3336         [ +  + ]:      43012 :         if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
    3337                 :            :                 return 0;
    3338                 :            : 
    3339                 :            :         /* inode may have no locality group for some reason */
    3340                 :       4230 :         lg = ac->ac_lg;
    3341            [ + ]:       4230 :         if (lg == NULL)
    3342                 :            :                 return 0;
    3343                 :     225980 :         order  = fls(ac->ac_o_ex.fe_len) - 1;
    3344         [ -  + ]:       4230 :         if (order > PREALLOC_TB_SIZE - 1)
    3345                 :            :                 /* The max size of hash table is PREALLOC_TB_SIZE */
    3346                 :            :                 order = PREALLOC_TB_SIZE - 1;
    3347                 :            : 
    3348                 :       4230 :         goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
    3349                 :            :         /*
    3350                 :            :          * search for the prealloc space that is having
    3351                 :            :          * minimal distance from the goal block.
    3352                 :            :          */
    3353         [ +  + ]:      43533 :         for (i = order; i < PREALLOC_TB_SIZE; i++) {
    3354                 :            :                 rcu_read_lock();
    3355         [ +  + ]:      43952 :                 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
    3356                 :            :                                         pa_inode_list) {
    3357                 :            :                         spin_lock(&pa->pa_lock);
    3358 [ +  - ][ +  + ]:       4649 :                         if (pa->pa_deleted == 0 &&
    3359                 :       4649 :                                         pa->pa_free >= ac->ac_o_ex.fe_len) {
    3360                 :            : 
    3361                 :       4473 :                                 cpa = ext4_mb_check_group_pa(goal_block,
    3362                 :            :                                                                 pa, cpa);
    3363                 :            :                         }
    3364                 :            :                         spin_unlock(&pa->pa_lock);
    3365                 :            :                 }
    3366                 :            :                 rcu_read_unlock();
    3367                 :            :         }
    3368         [ +  + ]:       4230 :         if (cpa) {
    3369                 :       4210 :                 ext4_mb_use_group_pa(ac, cpa);
    3370                 :       4210 :                 ac->ac_criteria = 20;
    3371                 :       4210 :                 return 1;
    3372                 :            :         }
    3373                 :            :         return 0;
    3374                 :            : }
    3375                 :            : 
    3376                 :            : /*
    3377                 :            :  * the function goes through all block freed in the group
    3378                 :            :  * but not yet committed and marks them used in in-core bitmap.
    3379                 :            :  * buddy must be generated from this bitmap
    3380                 :            :  * Need to be called with the ext4 group lock held
    3381                 :            :  */
    3382                 :          0 : static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
    3383                 :            :                                                 ext4_group_t group)
    3384                 :            : {
    3385                 :            :         struct rb_node *n;
    3386                 :            :         struct ext4_group_info *grp;
    3387                 :            :         struct ext4_free_data *entry;
    3388                 :            : 
    3389                 :            :         grp = ext4_get_group_info(sb, group);
    3390                 :        131 :         n = rb_first(&(grp->bb_free_root));
    3391                 :            : 
    3392         [ -  + ]:        131 :         while (n) {
    3393                 :            :                 entry = rb_entry(n, struct ext4_free_data, efd_node);
    3394                 :          0 :                 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
    3395                 :          0 :                 n = rb_next(n);
    3396                 :            :         }
    3397                 :        131 :         return;
    3398                 :            : }
    3399                 :            : 
    3400                 :            : /*
    3401                 :            :  * the function goes through all preallocation in this group and marks them
    3402                 :            :  * used in in-core bitmap. buddy must be generated from this bitmap
    3403                 :            :  * Need to be called with ext4 group lock held
    3404                 :            :  */
    3405                 :            : static noinline_for_stack
    3406                 :          0 : void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
    3407                 :            :                                         ext4_group_t group)
    3408                 :            : {
    3409                 :            :         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
    3410                 :            :         struct ext4_prealloc_space *pa;
    3411                 :            :         struct list_head *cur;
    3412                 :            :         ext4_group_t groupnr;
    3413                 :            :         ext4_grpblk_t start;
    3414                 :            :         int preallocated = 0;
    3415                 :            :         int len;
    3416                 :            : 
    3417                 :            :         /* all form of preallocation discards first load group,
    3418                 :            :          * so the only competing code is preallocation use.
    3419                 :            :          * we don't need any locking here
    3420                 :            :          * notice we do NOT ignore preallocations with pa_deleted
    3421                 :            :          * otherwise we could leave used blocks available for
    3422                 :            :          * allocation in buddy when concurrent ext4_mb_put_pa()
    3423                 :            :          * is dropping preallocation
    3424                 :            :          */
    3425         [ +  + ]:        139 :         list_for_each(cur, &grp->bb_prealloc_list) {
    3426                 :            :                 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
    3427                 :            :                 spin_lock(&pa->pa_lock);
    3428                 :          8 :                 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
    3429                 :            :                                              &groupnr, &start);
    3430                 :          8 :                 len = pa->pa_len;
    3431                 :            :                 spin_unlock(&pa->pa_lock);
    3432         [ -  + ]:        139 :                 if (unlikely(len == 0))
    3433                 :          0 :                         continue;
    3434         [ -  + ]:          8 :                 BUG_ON(groupnr != group);
    3435                 :          8 :                 ext4_set_bits(bitmap, start, len);
    3436                 :            :                 preallocated += len;
    3437                 :            :         }
    3438                 :            :         mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
    3439                 :        131 : }
    3440                 :            : 
    3441                 :          0 : static void ext4_mb_pa_callback(struct rcu_head *head)
    3442                 :            : {
    3443                 :            :         struct ext4_prealloc_space *pa;
    3444                 :      22218 :         pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
    3445                 :            : 
    3446         [ -  + ]:      22218 :         BUG_ON(atomic_read(&pa->pa_count));
    3447         [ -  + ]:      22218 :         BUG_ON(pa->pa_deleted == 0);
    3448                 :      22218 :         kmem_cache_free(ext4_pspace_cachep, pa);
    3449                 :      22250 : }
    3450                 :            : 
    3451                 :            : /*
    3452                 :            :  * drops a reference to preallocated space descriptor
    3453                 :            :  * if this was the last reference and the space is consumed
    3454                 :            :  */
    3455                 :     151037 : static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
    3456                 :            :                         struct super_block *sb, struct ext4_prealloc_space *pa)
    3457                 :            : {
    3458                 :            :         ext4_group_t grp;
    3459                 :            :         ext4_fsblk_t grp_blk;
    3460                 :            : 
    3461                 :            :         /* in this short window concurrent discard can set pa_deleted */
    3462                 :            :         spin_lock(&pa->pa_lock);
    3463 [ +  - ][ +  + ]:     151042 :         if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
    3464                 :            :                 spin_unlock(&pa->pa_lock);
    3465                 :            :                 return;
    3466                 :            :         }
    3467                 :            : 
    3468         [ -  + ]:        349 :         if (pa->pa_deleted == 1) {
    3469                 :            :                 spin_unlock(&pa->pa_lock);
    3470                 :            :                 return;
    3471                 :            :         }
    3472                 :            : 
    3473                 :        349 :         pa->pa_deleted = 1;
    3474                 :            :         spin_unlock(&pa->pa_lock);
    3475                 :            : 
    3476                 :        349 :         grp_blk = pa->pa_pstart;
    3477                 :            :         /*
    3478                 :            :          * If doing group-based preallocation, pa_pstart may be in the
    3479                 :            :          * next group when pa is used up
    3480                 :            :          */
    3481         [ +  + ]:        349 :         if (pa->pa_type == MB_GROUP_PA)
    3482                 :         20 :                 grp_blk--;
    3483                 :            : 
    3484                 :        349 :         grp = ext4_get_group_number(sb, grp_blk);
    3485                 :            : 
    3486                 :            :         /*
    3487                 :            :          * possible race:
    3488                 :            :          *
    3489                 :            :          *  P1 (buddy init)                     P2 (regular allocation)
    3490                 :            :          *                                      find block B in PA
    3491                 :            :          *  copy on-disk bitmap to buddy
    3492                 :            :          *                                      mark B in on-disk bitmap
    3493                 :            :          *                                      drop PA from group
    3494                 :            :          *  mark all PAs in buddy
    3495                 :            :          *
    3496                 :            :          * thus, P1 initializes buddy with B available. to prevent this
    3497                 :            :          * we make "copy" and "mark all PAs" atomic and serialize "drop PA"
    3498                 :            :          * against that pair
    3499                 :            :          */
    3500                 :            :         ext4_lock_group(sb, grp);
    3501                 :            :         list_del(&pa->pa_group_list);
    3502                 :            :         ext4_unlock_group(sb, grp);
    3503                 :            : 
    3504                 :        349 :         spin_lock(pa->pa_obj_lock);
    3505                 :            :         list_del_rcu(&pa->pa_inode_list);
    3506                 :        349 :         spin_unlock(pa->pa_obj_lock);
    3507                 :            : 
    3508                 :        349 :         call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
    3509                 :            : }
    3510                 :            : 
    3511                 :            : /*
    3512                 :            :  * creates new preallocated space for given inode
    3513                 :            :  */
    3514                 :            : static noinline_for_stack int
    3515                 :          0 : ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
    3516                 :            : {
    3517                 :      22234 :         struct super_block *sb = ac->ac_sb;
    3518                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    3519                 :            :         struct ext4_prealloc_space *pa;
    3520                 :            :         struct ext4_group_info *grp;
    3521                 :            :         struct ext4_inode_info *ei;
    3522                 :            : 
    3523                 :            :         /* preallocate only when found space is larger then requested */
    3524         [ -  + ]:      22234 :         BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
    3525         [ -  + ]:      22234 :         BUG_ON(ac->ac_status != AC_STATUS_FOUND);
    3526         [ -  + ]:      22234 :         BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
    3527                 :            : 
    3528                 :      22234 :         pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
    3529         [ +  - ]:      22236 :         if (pa == NULL)
    3530                 :            :                 return -ENOMEM;
    3531                 :            : 
    3532         [ +  + ]:      22236 :         if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
    3533                 :            :                 int winl;
    3534                 :            :                 int wins;
    3535                 :            :                 int win;
    3536                 :            :                 int offs;
    3537                 :            : 
    3538                 :            :                 /* we can't allocate as much as normalizer wants.
    3539                 :            :                  * so, found space must get proper lstart
    3540                 :            :                  * to cover original request */
    3541         [ -  + ]:         10 :                 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
    3542         [ -  + ]:         10 :                 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
    3543                 :            : 
    3544                 :            :                 /* we're limited by original request in that
    3545                 :            :                  * logical block must be covered any way
    3546                 :            :                  * winl is window we can move our chunk within */
    3547                 :         10 :                 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
    3548                 :            : 
    3549                 :            :                 /* also, we should cover whole original request */
    3550                 :         10 :                 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
    3551                 :            : 
    3552                 :            :                 /* the smallest one defines real window */
    3553                 :         10 :                 win = min(winl, wins);
    3554                 :            : 
    3555                 :         10 :                 offs = ac->ac_o_ex.fe_logical %
    3556                 :         10 :                         EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
    3557         [ +  + ]:         10 :                 if (offs && offs < win)
    3558                 :            :                         win = offs;
    3559                 :            : 
    3560                 :         10 :                 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
    3561                 :         10 :                         EXT4_NUM_B2C(sbi, win);
    3562         [ -  + ]:         10 :                 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
    3563         [ -  + ]:         10 :                 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
    3564                 :            :         }
    3565                 :            : 
    3566                 :            :         /* preallocation can change ac_b_ex, thus we store actually
    3567                 :            :          * allocated blocks for history */
    3568                 :      22236 :         ac->ac_f_ex = ac->ac_b_ex;
    3569                 :            : 
    3570                 :      22236 :         pa->pa_lstart = ac->ac_b_ex.fe_logical;
    3571                 :      44472 :         pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
    3572                 :      22236 :         pa->pa_len = ac->ac_b_ex.fe_len;
    3573                 :      22236 :         pa->pa_free = pa->pa_len;
    3574                 :      22236 :         atomic_set(&pa->pa_count, 1);
    3575                 :      22236 :         spin_lock_init(&pa->pa_lock);
    3576                 :      22236 :         INIT_LIST_HEAD(&pa->pa_inode_list);
    3577                 :      22236 :         INIT_LIST_HEAD(&pa->pa_group_list);
    3578                 :      22236 :         pa->pa_deleted = 0;
    3579                 :      22236 :         pa->pa_type = MB_INODE_PA;
    3580                 :            : 
    3581                 :            :         mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
    3582                 :            :                         pa->pa_pstart, pa->pa_len, pa->pa_lstart);
    3583                 :            :         trace_ext4_mb_new_inode_pa(ac, pa);
    3584                 :            : 
    3585                 :          2 :         ext4_mb_use_inode_pa(ac, pa);
    3586                 :      22236 :         atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
    3587                 :            : 
    3588                 :      22236 :         ei = EXT4_I(ac->ac_inode);
    3589                 :      22236 :         grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
    3590                 :            : 
    3591                 :      22236 :         pa->pa_obj_lock = &ei->i_prealloc_lock;
    3592                 :      22236 :         pa->pa_inode = ac->ac_inode;
    3593                 :            : 
    3594                 :      22236 :         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
    3595                 :      22236 :         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
    3596                 :      22236 :         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
    3597                 :            : 
    3598                 :      22236 :         spin_lock(pa->pa_obj_lock);
    3599                 :      22236 :         list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
    3600                 :      22236 :         spin_unlock(pa->pa_obj_lock);
    3601                 :            : 
    3602                 :      22236 :         return 0;
    3603                 :            : }
    3604                 :            : 
    3605                 :            : /*
    3606                 :            :  * creates new preallocated space for locality group inodes belongs to
    3607                 :            :  */
    3608                 :            : static noinline_for_stack int
    3609                 :          0 : ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
    3610                 :            : {
    3611                 :         40 :         struct super_block *sb = ac->ac_sb;
    3612                 :            :         struct ext4_locality_group *lg;
    3613                 :            :         struct ext4_prealloc_space *pa;
    3614                 :            :         struct ext4_group_info *grp;
    3615                 :            : 
    3616                 :            :         /* preallocate only when found space is larger then requested */
    3617         [ -  + ]:         20 :         BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
    3618         [ -  + ]:         20 :         BUG_ON(ac->ac_status != AC_STATUS_FOUND);
    3619         [ -  + ]:         20 :         BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
    3620                 :            : 
    3621         [ -  + ]:         20 :         BUG_ON(ext4_pspace_cachep == NULL);
    3622                 :         20 :         pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
    3623         [ +  - ]:         20 :         if (pa == NULL)
    3624                 :            :                 return -ENOMEM;
    3625                 :            : 
    3626                 :            :         /* preallocation can change ac_b_ex, thus we store actually
    3627                 :            :          * allocated blocks for history */
    3628                 :         20 :         ac->ac_f_ex = ac->ac_b_ex;
    3629                 :            : 
    3630                 :         40 :         pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
    3631                 :         20 :         pa->pa_lstart = pa->pa_pstart;
    3632                 :         20 :         pa->pa_len = ac->ac_b_ex.fe_len;
    3633                 :         20 :         pa->pa_free = pa->pa_len;
    3634                 :         20 :         atomic_set(&pa->pa_count, 1);
    3635                 :         20 :         spin_lock_init(&pa->pa_lock);
    3636                 :         20 :         INIT_LIST_HEAD(&pa->pa_inode_list);
    3637                 :         20 :         INIT_LIST_HEAD(&pa->pa_group_list);
    3638                 :         20 :         pa->pa_deleted = 0;
    3639                 :         20 :         pa->pa_type = MB_GROUP_PA;
    3640                 :            : 
    3641                 :            :         mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
    3642                 :            :                         pa->pa_pstart, pa->pa_len, pa->pa_lstart);
    3643                 :            :         trace_ext4_mb_new_group_pa(ac, pa);
    3644                 :            : 
    3645                 :         20 :         ext4_mb_use_group_pa(ac, pa);
    3646                 :         40 :         atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
    3647                 :            : 
    3648                 :         20 :         grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
    3649                 :         20 :         lg = ac->ac_lg;
    3650         [ -  + ]:         20 :         BUG_ON(lg == NULL);
    3651                 :            : 
    3652                 :         20 :         pa->pa_obj_lock = &lg->lg_prealloc_lock;
    3653                 :         20 :         pa->pa_inode = NULL;
    3654                 :            : 
    3655                 :         20 :         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
    3656                 :         20 :         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
    3657                 :         20 :         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
    3658                 :            : 
    3659                 :            :         /*
    3660                 :            :          * We will later add the new pa to the right bucket
    3661                 :            :          * after updating the pa_free in ext4_mb_release_context
    3662                 :            :          */
    3663                 :         20 :         return 0;
    3664                 :            : }
    3665                 :            : 
    3666                 :          0 : static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
    3667                 :            : {
    3668                 :            :         int err;
    3669                 :            : 
    3670         [ +  + ]:      22254 :         if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
    3671                 :         20 :                 err = ext4_mb_new_group_pa(ac);
    3672                 :            :         else
    3673                 :      22234 :                 err = ext4_mb_new_inode_pa(ac);
    3674                 :      22256 :         return err;
    3675                 :            : }
    3676                 :            : 
    3677                 :            : /*
    3678                 :            :  * finds all unused blocks in on-disk bitmap, frees them in
    3679                 :            :  * in-core bitmap and buddy.
    3680                 :            :  * @pa must be unlinked from inode and group lists, so that
    3681                 :            :  * nobody else can find/use it.
    3682                 :            :  * the caller MUST hold group/inode locks.
    3683                 :            :  * TODO: optimize the case when there are no in-core structures yet
    3684                 :            :  */
    3685                 :            : static noinline_for_stack int
    3686                 :          0 : ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
    3687                 :            :                         struct ext4_prealloc_space *pa)
    3688                 :            : {
    3689                 :      21908 :         struct super_block *sb = e4b->bd_sb;
    3690                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    3691                 :            :         unsigned int end;
    3692                 :            :         unsigned int next;
    3693                 :            :         ext4_group_t group;
    3694                 :            :         ext4_grpblk_t bit;
    3695                 :            :         unsigned long long grp_blk_start;
    3696                 :            :         int err = 0;
    3697                 :            :         int free = 0;
    3698                 :            : 
    3699         [ -  + ]:      21908 :         BUG_ON(pa->pa_deleted == 0);
    3700                 :      21908 :         ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
    3701                 :      21909 :         grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
    3702 [ -  + ][ #  # ]:      21909 :         BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
    3703                 :      21909 :         end = bit + pa->pa_len;
    3704                 :            : 
    3705         [ +  + ]:     104829 :         while (bit < end) {
    3706                 :     166899 :                 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
    3707         [ +  + ]:      83450 :                 if (bit >= end)
    3708                 :            :                         break;
    3709                 :     165839 :                 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
    3710                 :            :                 mb_debug(1, "    free preallocated %u/%u in group %u\n",
    3711                 :            :                          (unsigned) ext4_group_first_block_no(sb, group) + bit,
    3712                 :            :                          (unsigned) next - bit, (unsigned) group);
    3713                 :      82919 :                 free += next - bit;
    3714                 :            : 
    3715                 :      82919 :                 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
    3716                 :      82914 :                 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
    3717                 :      82914 :                                                     EXT4_C2B(sbi, bit)),
    3718                 :            :                                                next - bit);
    3719                 :      82914 :                 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
    3720                 :      82920 :                 bit = next + 1;
    3721                 :            :         }
    3722         [ -  + ]:      21910 :         if (free != pa->pa_free) {
    3723                 :          0 :                 ext4_msg(e4b->bd_sb, KERN_CRIT,
    3724                 :            :                          "pa %p: logic %lu, phys. %lu, len %lu",
    3725                 :            :                          pa, (unsigned long) pa->pa_lstart,
    3726                 :            :                          (unsigned long) pa->pa_pstart,
    3727                 :            :                          (unsigned long) pa->pa_len);
    3728                 :          0 :                 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
    3729                 :            :                                         free, pa->pa_free);
    3730                 :            :                 /*
    3731                 :            :                  * pa is already deleted so we use the value obtained
    3732                 :            :                  * from the bitmap and continue.
    3733                 :            :                  */
    3734                 :            :         }
    3735                 :          2 :         atomic_add(free, &sbi->s_mb_discarded);
    3736                 :            : 
    3737                 :      21909 :         return err;
    3738                 :            : }
    3739                 :            : 
    3740                 :            : static noinline_for_stack int
    3741                 :          0 : ext4_mb_release_group_pa(struct ext4_buddy *e4b,
    3742                 :            :                                 struct ext4_prealloc_space *pa)
    3743                 :            : {
    3744                 :          0 :         struct super_block *sb = e4b->bd_sb;
    3745                 :            :         ext4_group_t group;
    3746                 :            :         ext4_grpblk_t bit;
    3747                 :            : 
    3748                 :            :         trace_ext4_mb_release_group_pa(sb, pa);
    3749         [ #  # ]:          0 :         BUG_ON(pa->pa_deleted == 0);
    3750                 :          0 :         ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
    3751 [ #  # ][ #  # ]:          0 :         BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
    3752                 :          0 :         mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
    3753                 :          0 :         atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
    3754                 :          0 :         trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
    3755                 :            : 
    3756                 :          0 :         return 0;
    3757                 :            : }
    3758                 :            : 
    3759                 :            : /*
    3760                 :            :  * releases all preallocations in given group
    3761                 :            :  *
    3762                 :            :  * first, we need to decide discard policy:
    3763                 :            :  * - when do we discard
    3764                 :            :  *   1) ENOSPC
    3765                 :            :  * - how many do we discard
    3766                 :            :  *   1) how many requested
    3767                 :            :  */
    3768                 :            : static noinline_for_stack int
    3769                 :          0 : ext4_mb_discard_group_preallocations(struct super_block *sb,
    3770                 :            :                                         ext4_group_t group, int needed)
    3771                 :            : {
    3772                 :            :         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
    3773                 :            :         struct buffer_head *bitmap_bh = NULL;
    3774                 :            :         struct ext4_prealloc_space *pa, *tmp;
    3775                 :            :         struct list_head list;
    3776                 :            :         struct ext4_buddy e4b;
    3777                 :            :         int err;
    3778                 :            :         int busy = 0;
    3779                 :            :         int free = 0;
    3780                 :            : 
    3781                 :            :         mb_debug(1, "discard preallocation for group %u\n", group);
    3782                 :            : 
    3783         [ #  # ]:          0 :         if (list_empty(&grp->bb_prealloc_list))
    3784                 :            :                 return 0;
    3785                 :            : 
    3786                 :          0 :         bitmap_bh = ext4_read_block_bitmap(sb, group);
    3787         [ #  # ]:          0 :         if (bitmap_bh == NULL) {
    3788                 :          0 :                 ext4_error(sb, "Error reading block bitmap for %u", group);
    3789                 :          0 :                 return 0;
    3790                 :            :         }
    3791                 :            : 
    3792                 :          0 :         err = ext4_mb_load_buddy(sb, group, &e4b);
    3793         [ #  # ]:          0 :         if (err) {
    3794                 :          0 :                 ext4_error(sb, "Error loading buddy information for %u", group);
    3795                 :            :                 put_bh(bitmap_bh);
    3796                 :          0 :                 return 0;
    3797                 :            :         }
    3798                 :            : 
    3799         [ #  # ]:          0 :         if (needed == 0)
    3800                 :          0 :                 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
    3801                 :            : 
    3802                 :            :         INIT_LIST_HEAD(&list);
    3803                 :            : repeat:
    3804                 :            :         ext4_lock_group(sb, group);
    3805         [ #  # ]:          0 :         list_for_each_entry_safe(pa, tmp,
    3806                 :            :                                 &grp->bb_prealloc_list, pa_group_list) {
    3807                 :            :                 spin_lock(&pa->pa_lock);
    3808         [ #  # ]:          0 :                 if (atomic_read(&pa->pa_count)) {
    3809                 :            :                         spin_unlock(&pa->pa_lock);
    3810                 :            :                         busy = 1;
    3811                 :          0 :                         continue;
    3812                 :            :                 }
    3813         [ #  # ]:          0 :                 if (pa->pa_deleted) {
    3814                 :            :                         spin_unlock(&pa->pa_lock);
    3815                 :          0 :                         continue;
    3816                 :            :                 }
    3817                 :            : 
    3818                 :            :                 /* seems this one can be freed ... */
    3819                 :          0 :                 pa->pa_deleted = 1;
    3820                 :            : 
    3821                 :            :                 /* we can trust pa_free ... */
    3822                 :          0 :                 free += pa->pa_free;
    3823                 :            : 
    3824                 :            :                 spin_unlock(&pa->pa_lock);
    3825                 :            : 
    3826                 :            :                 list_del(&pa->pa_group_list);
    3827                 :          0 :                 list_add(&pa->u.pa_tmp_list, &list);
    3828                 :            :         }
    3829                 :            : 
    3830                 :            :         /* if we still need more blocks and some PAs were used, try again */
    3831         [ #  # ]:          0 :         if (free < needed && busy) {
    3832                 :            :                 busy = 0;
    3833                 :            :                 ext4_unlock_group(sb, group);
    3834                 :          0 :                 cond_resched();
    3835                 :          0 :                 goto repeat;
    3836                 :            :         }
    3837                 :            : 
    3838                 :            :         /* found anything to free? */
    3839         [ #  # ]:          0 :         if (list_empty(&list)) {
    3840         [ #  # ]:          0 :                 BUG_ON(free != 0);
    3841                 :            :                 goto out;
    3842                 :            :         }
    3843                 :            : 
    3844                 :            :         /* now free all selected PAs */
    3845         [ #  # ]:          0 :         list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
    3846                 :            : 
    3847                 :            :                 /* remove from object (inode or locality group) */
    3848                 :          0 :                 spin_lock(pa->pa_obj_lock);
    3849                 :            :                 list_del_rcu(&pa->pa_inode_list);
    3850                 :          0 :                 spin_unlock(pa->pa_obj_lock);
    3851                 :            : 
    3852         [ #  # ]:          0 :                 if (pa->pa_type == MB_GROUP_PA)
    3853                 :          0 :                         ext4_mb_release_group_pa(&e4b, pa);
    3854                 :            :                 else
    3855                 :          0 :                         ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
    3856                 :            : 
    3857                 :            :                 list_del(&pa->u.pa_tmp_list);
    3858                 :          0 :                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
    3859                 :            :         }
    3860                 :            : 
    3861                 :            : out:
    3862                 :            :         ext4_unlock_group(sb, group);
    3863                 :          0 :         ext4_mb_unload_buddy(&e4b);
    3864                 :            :         put_bh(bitmap_bh);
    3865                 :          0 :         return free;
    3866                 :            : }
    3867                 :            : 
    3868                 :            : /*
    3869                 :            :  * releases all non-used preallocated blocks for given inode
    3870                 :            :  *
    3871                 :            :  * It's important to discard preallocations under i_data_sem
    3872                 :            :  * We don't want another block to be served from the prealloc
    3873                 :            :  * space when we are discarding the inode prealloc space.
    3874                 :            :  *
    3875                 :            :  * FIXME!! Make sure it is valid at all the call sites
    3876                 :            :  */
    3877                 :          0 : void ext4_discard_preallocations(struct inode *inode)
    3878                 :            : {
    3879                 :            :         struct ext4_inode_info *ei = EXT4_I(inode);
    3880                 :     919434 :         struct super_block *sb = inode->i_sb;
    3881                 :            :         struct buffer_head *bitmap_bh = NULL;
    3882                 :            :         struct ext4_prealloc_space *pa, *tmp;
    3883                 :            :         ext4_group_t group = 0;
    3884                 :            :         struct list_head list;
    3885                 :            :         struct ext4_buddy e4b;
    3886                 :            :         int err;
    3887                 :            : 
    3888         [ +  + ]:     919434 :         if (!S_ISREG(inode->i_mode)) {
    3889                 :            :                 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
    3890                 :     186431 :                 return;
    3891                 :            :         }
    3892                 :            : 
    3893                 :            :         mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
    3894                 :            :         trace_ext4_discard_preallocations(inode);
    3895                 :            : 
    3896                 :            :         INIT_LIST_HEAD(&list);
    3897                 :            : 
    3898                 :            : repeat:
    3899                 :            :         /* first, collect all pa's in the inode */
    3900                 :            :         spin_lock(&ei->i_prealloc_lock);
    3901         [ +  + ]:     754912 :         while (!list_empty(&ei->i_prealloc_list)) {
    3902                 :      21909 :                 pa = list_entry(ei->i_prealloc_list.next,
    3903                 :            :                                 struct ext4_prealloc_space, pa_inode_list);
    3904         [ +  + ]:      21909 :                 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
    3905                 :            :                 spin_lock(&pa->pa_lock);
    3906         [ -  + ]:      21909 :                 if (atomic_read(&pa->pa_count)) {
    3907                 :            :                         /* this shouldn't happen often - nobody should
    3908                 :            :                          * use preallocation while we're discarding it */
    3909                 :            :                         spin_unlock(&pa->pa_lock);
    3910                 :            :                         spin_unlock(&ei->i_prealloc_lock);
    3911                 :          0 :                         ext4_msg(sb, KERN_ERR,
    3912                 :            :                                  "uh-oh! used pa while discarding");
    3913                 :          0 :                         WARN_ON(1);
    3914                 :          0 :                         schedule_timeout_uninterruptible(HZ);
    3915                 :          0 :                         goto repeat;
    3916                 :            : 
    3917                 :            :                 }
    3918         [ +  - ]:      21909 :                 if (pa->pa_deleted == 0) {
    3919                 :      21909 :                         pa->pa_deleted = 1;
    3920                 :            :                         spin_unlock(&pa->pa_lock);
    3921                 :            :                         list_del_rcu(&pa->pa_inode_list);
    3922                 :      21908 :                         list_add(&pa->u.pa_tmp_list, &list);
    3923                 :      21908 :                         continue;
    3924                 :            :                 }
    3925                 :            : 
    3926                 :            :                 /* someone is deleting pa right now */
    3927                 :            :                 spin_unlock(&pa->pa_lock);
    3928                 :            :                 spin_unlock(&ei->i_prealloc_lock);
    3929                 :            : 
    3930                 :            :                 /* we have to wait here because pa_deleted
    3931                 :            :                  * doesn't mean pa is already unlinked from
    3932                 :            :                  * the list. as we might be called from
    3933                 :            :                  * ->clear_inode() the inode will get freed
    3934                 :            :                  * and concurrent thread which is unlinking
    3935                 :            :                  * pa from inode's list may access already
    3936                 :            :                  * freed memory, bad-bad-bad */
    3937                 :            : 
    3938                 :            :                 /* XXX: if this happens too often, we can
    3939                 :            :                  * add a flag to force wait only in case
    3940                 :            :                  * of ->clear_inode(), but not in case of
    3941                 :            :                  * regular truncate */
    3942                 :          0 :                 schedule_timeout_uninterruptible(HZ);
    3943                 :          0 :                 goto repeat;
    3944                 :            :         }
    3945                 :            :         spin_unlock(&ei->i_prealloc_lock);
    3946                 :            : 
    3947         [ +  + ]:    1674346 :         list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
    3948         [ -  + ]:      21908 :                 BUG_ON(pa->pa_type != MB_INODE_PA);
    3949                 :      21908 :                 group = ext4_get_group_number(sb, pa->pa_pstart);
    3950                 :            : 
    3951                 :      21908 :                 err = ext4_mb_load_buddy(sb, group, &e4b);
    3952         [ -  + ]:      21905 :                 if (err) {
    3953                 :          0 :                         ext4_error(sb, "Error loading buddy information for %u",
    3954                 :            :                                         group);
    3955                 :          0 :                         continue;
    3956                 :            :                 }
    3957                 :            : 
    3958                 :      21905 :                 bitmap_bh = ext4_read_block_bitmap(sb, group);
    3959         [ -  + ]:      21907 :                 if (bitmap_bh == NULL) {
    3960                 :          0 :                         ext4_error(sb, "Error reading block bitmap for %u",
    3961                 :            :                                         group);
    3962                 :          0 :                         ext4_mb_unload_buddy(&e4b);
    3963                 :          0 :                         continue;
    3964                 :            :                 }
    3965                 :            : 
    3966                 :            :                 ext4_lock_group(sb, group);
    3967                 :            :                 list_del(&pa->pa_group_list);
    3968                 :      21908 :                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
    3969                 :            :                 ext4_unlock_group(sb, group);
    3970                 :            : 
    3971                 :      21909 :                 ext4_mb_unload_buddy(&e4b);
    3972                 :            :                 put_bh(bitmap_bh);
    3973                 :            : 
    3974                 :            :                 list_del(&pa->u.pa_tmp_list);
    3975                 :      21909 :                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
    3976                 :            :         }
    3977                 :            : }
    3978                 :            : 
    3979                 :            : #ifdef CONFIG_EXT4_DEBUG
    3980                 :            : static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
    3981                 :            : {
    3982                 :            :         struct super_block *sb = ac->ac_sb;
    3983                 :            :         ext4_group_t ngroups, i;
    3984                 :            : 
    3985                 :            :         if (!ext4_mballoc_debug ||
    3986                 :            :             (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
    3987                 :            :                 return;
    3988                 :            : 
    3989                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
    3990                 :            :                         " Allocation context details:");
    3991                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
    3992                 :            :                         ac->ac_status, ac->ac_flags);
    3993                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
    3994                 :            :                         "goal %lu/%lu/%lu@%lu, "
    3995                 :            :                         "best %lu/%lu/%lu@%lu cr %d",
    3996                 :            :                         (unsigned long)ac->ac_o_ex.fe_group,
    3997                 :            :                         (unsigned long)ac->ac_o_ex.fe_start,
    3998                 :            :                         (unsigned long)ac->ac_o_ex.fe_len,
    3999                 :            :                         (unsigned long)ac->ac_o_ex.fe_logical,
    4000                 :            :                         (unsigned long)ac->ac_g_ex.fe_group,
    4001                 :            :                         (unsigned long)ac->ac_g_ex.fe_start,
    4002                 :            :                         (unsigned long)ac->ac_g_ex.fe_len,
    4003                 :            :                         (unsigned long)ac->ac_g_ex.fe_logical,
    4004                 :            :                         (unsigned long)ac->ac_b_ex.fe_group,
    4005                 :            :                         (unsigned long)ac->ac_b_ex.fe_start,
    4006                 :            :                         (unsigned long)ac->ac_b_ex.fe_len,
    4007                 :            :                         (unsigned long)ac->ac_b_ex.fe_logical,
    4008                 :            :                         (int)ac->ac_criteria);
    4009                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
    4010                 :            :                  ac->ac_ex_scanned, ac->ac_found);
    4011                 :            :         ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
    4012                 :            :         ngroups = ext4_get_groups_count(sb);
    4013                 :            :         for (i = 0; i < ngroups; i++) {
    4014                 :            :                 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
    4015                 :            :                 struct ext4_prealloc_space *pa;
    4016                 :            :                 ext4_grpblk_t start;
    4017                 :            :                 struct list_head *cur;
    4018                 :            :                 ext4_lock_group(sb, i);
    4019                 :            :                 list_for_each(cur, &grp->bb_prealloc_list) {
    4020                 :            :                         pa = list_entry(cur, struct ext4_prealloc_space,
    4021                 :            :                                         pa_group_list);
    4022                 :            :                         spin_lock(&pa->pa_lock);
    4023                 :            :                         ext4_get_group_no_and_offset(sb, pa->pa_pstart,
    4024                 :            :                                                      NULL, &start);
    4025                 :            :                         spin_unlock(&pa->pa_lock);
    4026                 :            :                         printk(KERN_ERR "PA:%u:%d:%u \n", i,
    4027                 :            :                                start, pa->pa_len);
    4028                 :            :                 }
    4029                 :            :                 ext4_unlock_group(sb, i);
    4030                 :            : 
    4031                 :            :                 if (grp->bb_free == 0)
    4032                 :            :                         continue;
    4033                 :            :                 printk(KERN_ERR "%u: %d/%d \n",
    4034                 :            :                        i, grp->bb_free, grp->bb_fragments);
    4035                 :            :         }
    4036                 :            :         printk(KERN_ERR "\n");
    4037                 :            : }
    4038                 :            : #else
    4039                 :            : static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
    4040                 :            : {
    4041                 :            :         return;
    4042                 :            : }
    4043                 :            : #endif
    4044                 :            : 
    4045                 :            : /*
    4046                 :            :  * We use locality group preallocation for small size file. The size of the
    4047                 :            :  * file is determined by the current size or the resulting size after
    4048                 :            :  * allocation which ever is larger
    4049                 :            :  *
    4050                 :            :  * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
    4051                 :            :  */
    4052                 :          0 : static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
    4053                 :            : {
    4054                 :     217518 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    4055                 :     217518 :         int bsbits = ac->ac_sb->s_blocksize_bits;
    4056                 :            :         loff_t size, isize;
    4057                 :            : 
    4058         [ +  + ]:     217518 :         if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
    4059                 :            :                 return;
    4060                 :            : 
    4061         [ +  - ]:     167575 :         if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
    4062                 :            :                 return;
    4063                 :            : 
    4064                 :     167575 :         size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
    4065                 :     167577 :         isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
    4066                 :            :                 >> bsbits;
    4067                 :            : 
    4068 [ +  + ][ +  - ]:     385089 :         if ((size == isize) &&
    4069         [ +  + ]:      23247 :             !ext4_fs_is_busy(sbi) &&
    4070                 :      23247 :             (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
    4071                 :      13551 :                 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
    4072                 :      13551 :                 return;
    4073                 :            :         }
    4074                 :            : 
    4075         [ -  + ]:     154020 :         if (sbi->s_mb_group_prealloc <= 0) {
    4076                 :          0 :                 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
    4077                 :          0 :                 return;
    4078                 :            :         }
    4079                 :            : 
    4080                 :            :         /* don't use group allocation for large files */
    4081                 :     154020 :         size = max(size, isize);
    4082         [ +  + ]:     154020 :         if (size > sbi->s_mb_stream_request) {
    4083                 :     149790 :                 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
    4084                 :     149790 :                 return;
    4085                 :            :         }
    4086                 :            : 
    4087         [ -  + ]:       4230 :         BUG_ON(ac->ac_lg != NULL);
    4088                 :            :         /*
    4089                 :            :          * locality group prealloc space are per cpu. The reason for having
    4090                 :            :          * per cpu locality group is to reduce the contention between block
    4091                 :            :          * request from multiple CPUs.
    4092                 :            :          */
    4093                 :       8460 :         ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
    4094                 :            : 
    4095                 :            :         /* we're going to use group allocation */
    4096                 :       4230 :         ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
    4097                 :            : 
    4098                 :            :         /* serialize all allocations in the group */
    4099                 :       4230 :         mutex_lock(&ac->ac_lg->lg_mutex);
    4100                 :            : }
    4101                 :            : 
    4102                 :            : static noinline_for_stack int
    4103                 :          0 : ext4_mb_initialize_context(struct ext4_allocation_context *ac,
    4104                 :            :                                 struct ext4_allocation_request *ar)
    4105                 :            : {
    4106                 :     217530 :         struct super_block *sb = ar->inode->i_sb;
    4107                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    4108                 :     435058 :         struct ext4_super_block *es = sbi->s_es;
    4109                 :            :         ext4_group_t group;
    4110                 :            :         unsigned int len;
    4111                 :            :         ext4_fsblk_t goal;
    4112                 :            :         ext4_grpblk_t block;
    4113                 :            : 
    4114                 :            :         /* we can't allocate > group size */
    4115                 :     217530 :         len = ar->len;
    4116                 :            : 
    4117                 :            :         /* just a dirty hack to filter too big requests  */
    4118         [ -  + ]:     217530 :         if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
    4119                 :            :                 len = EXT4_CLUSTERS_PER_GROUP(sb);
    4120                 :            : 
    4121                 :            :         /* start searching from the goal */
    4122                 :     217530 :         goal = ar->goal;
    4123    [ +  + ][ + ]:     217530 :         if (goal < le32_to_cpu(es->s_first_data_block) ||
    4124                 :            :                         goal >= ext4_blocks_count(es))
    4125                 :            :                 goal = le32_to_cpu(es->s_first_data_block);
    4126                 :     217530 :         ext4_get_group_no_and_offset(sb, goal, &group, &block);
    4127                 :            : 
    4128                 :            :         /* set up allocation goals */
    4129                 :     217532 :         ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
    4130                 :     217532 :         ac->ac_status = AC_STATUS_CONTINUE;
    4131                 :     217532 :         ac->ac_sb = sb;
    4132                 :     217532 :         ac->ac_inode = ar->inode;
    4133                 :     217532 :         ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
    4134                 :     217532 :         ac->ac_o_ex.fe_group = group;
    4135                 :     217532 :         ac->ac_o_ex.fe_start = block;
    4136                 :     217532 :         ac->ac_o_ex.fe_len = len;
    4137                 :     217532 :         ac->ac_g_ex = ac->ac_o_ex;
    4138                 :     217532 :         ac->ac_flags = ar->flags;
    4139                 :            : 
    4140                 :            :         /* we have to define context: we'll we work with a file or
    4141                 :            :          * locality group. this is a policy, actually */
    4142                 :     217532 :         ext4_mb_group_or_file(ac);
    4143                 :            : 
    4144                 :            :         mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
    4145                 :            :                         "left: %u/%u, right %u/%u to %swritable\n",
    4146                 :            :                         (unsigned) ar->len, (unsigned) ar->logical,
    4147                 :            :                         (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
    4148                 :            :                         (unsigned) ar->lleft, (unsigned) ar->pleft,
    4149                 :            :                         (unsigned) ar->lright, (unsigned) ar->pright,
    4150                 :            :                         atomic_read(&ar->inode->i_writecount) ? "" : "non-");
    4151                 :     217523 :         return 0;
    4152                 :            : 
    4153                 :            : }
    4154                 :            : 
    4155                 :            : static noinline_for_stack void
    4156                 :          0 : ext4_mb_discard_lg_preallocations(struct super_block *sb,
    4157                 :            :                                         struct ext4_locality_group *lg,
    4158                 :            :                                         int order, int total_entries)
    4159                 :            : {
    4160                 :            :         ext4_group_t group = 0;
    4161                 :            :         struct ext4_buddy e4b;
    4162                 :            :         struct list_head discard_list;
    4163                 :            :         struct ext4_prealloc_space *pa, *tmp;
    4164                 :            : 
    4165                 :            :         mb_debug(1, "discard locality group preallocation\n");
    4166                 :            : 
    4167                 :            :         INIT_LIST_HEAD(&discard_list);
    4168                 :            : 
    4169                 :            :         spin_lock(&lg->lg_prealloc_lock);
    4170         [ #  # ]:          0 :         list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
    4171                 :            :                                                 pa_inode_list) {
    4172                 :            :                 spin_lock(&pa->pa_lock);
    4173         [ #  # ]:          0 :                 if (atomic_read(&pa->pa_count)) {
    4174                 :            :                         /*
    4175                 :            :                          * This is the pa that we just used
    4176                 :            :                          * for block allocation. So don't
    4177                 :            :                          * free that
    4178                 :            :                          */
    4179                 :            :                         spin_unlock(&pa->pa_lock);
    4180                 :          0 :                         continue;
    4181                 :            :                 }
    4182         [ #  # ]:          0 :                 if (pa->pa_deleted) {
    4183                 :            :                         spin_unlock(&pa->pa_lock);
    4184                 :          0 :                         continue;
    4185                 :            :                 }
    4186                 :            :                 /* only lg prealloc space */
    4187         [ #  # ]:          0 :                 BUG_ON(pa->pa_type != MB_GROUP_PA);
    4188                 :            : 
    4189                 :            :                 /* seems this one can be freed ... */
    4190                 :          0 :                 pa->pa_deleted = 1;
    4191                 :            :                 spin_unlock(&pa->pa_lock);
    4192                 :            : 
    4193                 :            :                 list_del_rcu(&pa->pa_inode_list);
    4194                 :          0 :                 list_add(&pa->u.pa_tmp_list, &discard_list);
    4195                 :            : 
    4196                 :          0 :                 total_entries--;
    4197         [ #  # ]:          0 :                 if (total_entries <= 5) {
    4198                 :            :                         /*
    4199                 :            :                          * we want to keep only 5 entries
    4200                 :            :                          * allowing it to grow to 8. This
    4201                 :            :                          * mak sure we don't call discard
    4202                 :            :                          * soon for this list.
    4203                 :            :                          */
    4204                 :            :                         break;
    4205                 :            :                 }
    4206                 :            :         }
    4207                 :            :         spin_unlock(&lg->lg_prealloc_lock);
    4208                 :            : 
    4209         [ #  # ]:          0 :         list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
    4210                 :            : 
    4211                 :          0 :                 group = ext4_get_group_number(sb, pa->pa_pstart);
    4212         [ #  # ]:          0 :                 if (ext4_mb_load_buddy(sb, group, &e4b)) {
    4213                 :          0 :                         ext4_error(sb, "Error loading buddy information for %u",
    4214                 :            :                                         group);
    4215                 :          0 :                         continue;
    4216                 :            :                 }
    4217                 :            :                 ext4_lock_group(sb, group);
    4218                 :            :                 list_del(&pa->pa_group_list);
    4219                 :          0 :                 ext4_mb_release_group_pa(&e4b, pa);
    4220                 :            :                 ext4_unlock_group(sb, group);
    4221                 :            : 
    4222                 :          0 :                 ext4_mb_unload_buddy(&e4b);
    4223                 :            :                 list_del(&pa->u.pa_tmp_list);
    4224                 :          0 :                 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
    4225                 :            :         }
    4226                 :          0 : }
    4227                 :            : 
    4228                 :            : /*
    4229                 :            :  * We have incremented pa_count. So it cannot be freed at this
    4230                 :            :  * point. Also we hold lg_mutex. So no parallel allocation is
    4231                 :            :  * possible from this lg. That means pa_free cannot be updated.
    4232                 :            :  *
    4233                 :            :  * A parallel ext4_mb_discard_group_preallocations is possible.
    4234                 :            :  * which can cause the lg_prealloc_list to be updated.
    4235                 :            :  */
    4236                 :            : 
    4237                 :          0 : static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
    4238                 :            : {
    4239                 :            :         int order, added = 0, lg_prealloc_count = 1;
    4240                 :       4210 :         struct super_block *sb = ac->ac_sb;
    4241                 :       4210 :         struct ext4_locality_group *lg = ac->ac_lg;
    4242                 :       4210 :         struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
    4243                 :            : 
    4244                 :       4210 :         order = fls(pa->pa_free) - 1;
    4245         [ #  # ]:          0 :         if (order > PREALLOC_TB_SIZE - 1)
    4246                 :            :                 /* The max size of hash table is PREALLOC_TB_SIZE */
    4247                 :            :                 order = PREALLOC_TB_SIZE - 1;
    4248                 :            :         /* Add the prealloc space to lg */
    4249                 :            :         spin_lock(&lg->lg_prealloc_lock);
    4250         [ +  + ]:       4221 :         list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
    4251                 :            :                                                 pa_inode_list) {
    4252                 :            :                 spin_lock(&tmp_pa->pa_lock);
    4253         [ -  + ]:         11 :                 if (tmp_pa->pa_deleted) {
    4254                 :            :                         spin_unlock(&tmp_pa->pa_lock);
    4255                 :          0 :                         continue;
    4256                 :            :                 }
    4257 [ +  - ][ +  + ]:         11 :                 if (!added && pa->pa_free < tmp_pa->pa_free) {
    4258                 :            :                         /* Add to the tail of the previous entry */
    4259                 :          1 :                         list_add_tail_rcu(&pa->pa_inode_list,
    4260                 :            :                                                 &tmp_pa->pa_inode_list);
    4261                 :            :                         added = 1;
    4262                 :            :                         /*
    4263                 :            :                          * we want to count the total
    4264                 :            :                          * number of entries in the list
    4265                 :            :                          */
    4266                 :            :                 }
    4267                 :            :                 spin_unlock(&tmp_pa->pa_lock);
    4268                 :         11 :                 lg_prealloc_count++;
    4269                 :            :         }
    4270         [ +  + ]:       4210 :         if (!added)
    4271                 :       4209 :                 list_add_tail_rcu(&pa->pa_inode_list,
    4272                 :            :                                         &lg->lg_prealloc_list[order]);
    4273                 :            :         spin_unlock(&lg->lg_prealloc_lock);
    4274                 :            : 
    4275                 :            :         /* Now trim the list to be not more than 8 elements */
    4276         [ -  + ]:       4210 :         if (lg_prealloc_count > 8) {
    4277                 :          0 :                 ext4_mb_discard_lg_preallocations(sb, lg,
    4278                 :            :                                                   order, lg_prealloc_count);
    4279                 :          0 :                 return;
    4280                 :            :         }
    4281                 :            :         return ;
    4282                 :            : }
    4283                 :            : 
    4284                 :            : /*
    4285                 :            :  * release all resource we used in allocation
    4286                 :            :  */
    4287                 :          0 : static int ext4_mb_release_context(struct ext4_allocation_context *ac)
    4288                 :            : {
    4289                 :     217541 :         struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
    4290                 :     217541 :         struct ext4_prealloc_space *pa = ac->ac_pa;
    4291         [ +  + ]:     217541 :         if (pa) {
    4292         [ +  + ]:     151040 :                 if (pa->pa_type == MB_GROUP_PA) {
    4293                 :            :                         /* see comment in ext4_mb_use_group_pa() */
    4294                 :            :                         spin_lock(&pa->pa_lock);
    4295                 :       4230 :                         pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
    4296                 :       4230 :                         pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
    4297                 :       4230 :                         pa->pa_free -= ac->ac_b_ex.fe_len;
    4298                 :       4230 :                         pa->pa_len -= ac->ac_b_ex.fe_len;
    4299                 :            :                         spin_unlock(&pa->pa_lock);
    4300                 :            :                 }
    4301                 :            :         }
    4302         [ +  + ]:     435071 :         if (pa) {
    4303                 :            :                 /*
    4304                 :            :                  * We want to add the pa to the right bucket.
    4305                 :            :                  * Remove it from the list and while adding
    4306                 :            :                  * make sure the list to which we are adding
    4307                 :            :                  * doesn't grow big.
    4308                 :            :                  */
    4309 [ +  + ][ +  + ]:     151040 :                 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
    4310                 :       4210 :                         spin_lock(pa->pa_obj_lock);
    4311                 :            :                         list_del_rcu(&pa->pa_inode_list);
    4312                 :       4210 :                         spin_unlock(pa->pa_obj_lock);
    4313                 :       4210 :                         ext4_mb_add_n_trim(ac);
    4314                 :            :                 }
    4315                 :     151040 :                 ext4_mb_put_pa(ac, ac->ac_sb, pa);
    4316                 :            :         }
    4317         [ +  + ]:     435074 :         if (ac->ac_bitmap_page)
    4318                 :      88750 :                 page_cache_release(ac->ac_bitmap_page);
    4319         [ +  + ]:     217536 :         if (ac->ac_buddy_page)
    4320                 :      88753 :                 page_cache_release(ac->ac_buddy_page);
    4321         [ +  + ]:     217535 :         if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
    4322                 :       4230 :                 mutex_unlock(&ac->ac_lg->lg_mutex);
    4323                 :     217535 :         ext4_mb_collect_stats(ac);
    4324                 :     217533 :         return 0;
    4325                 :            : }
    4326                 :            : 
    4327                 :          0 : static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
    4328                 :            : {
    4329                 :            :         ext4_group_t i, ngroups = ext4_get_groups_count(sb);
    4330                 :            :         int ret;
    4331                 :            :         int freed = 0;
    4332                 :            : 
    4333                 :            :         trace_ext4_mb_discard_preallocations(sb, needed);
    4334         [ #  # ]:          0 :         for (i = 0; i < ngroups && needed > 0; i++) {
    4335                 :          0 :                 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
    4336                 :          0 :                 freed += ret;
    4337                 :          0 :                 needed -= ret;
    4338                 :            :         }
    4339                 :            : 
    4340                 :          0 :         return freed;
    4341                 :            : }
    4342                 :            : 
    4343                 :            : /*
    4344                 :            :  * Main entry point into mballoc to allocate blocks
    4345                 :            :  * it tries to use preallocation first, then falls back
    4346                 :            :  * to usual allocation
    4347                 :            :  */
    4348                 :          0 : ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
    4349                 :            :                                 struct ext4_allocation_request *ar, int *errp)
    4350                 :            : {
    4351                 :            :         int freed;
    4352                 :          0 :         struct ext4_allocation_context *ac = NULL;
    4353                 :            :         struct ext4_sb_info *sbi;
    4354                 :     217504 :         struct super_block *sb;
    4355                 :            :         ext4_fsblk_t block = 0;
    4356                 :            :         unsigned int inquota = 0;
    4357                 :            :         unsigned int reserv_clstrs = 0;
    4358                 :            : 
    4359                 :            :         might_sleep();
    4360                 :     217504 :         sb = ar->inode->i_sb;
    4361                 :            :         sbi = EXT4_SB(sb);
    4362                 :            : 
    4363                 :            :         trace_ext4_request_blocks(ar);
    4364                 :            : 
    4365                 :            :         /* Allow to use superuser reservation for quota file */
    4366         [ -  + ]:     435031 :         if (IS_NOQUOTA(ar->inode))
    4367                 :          0 :                 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
    4368                 :            : 
    4369                 :            :         /*
    4370                 :            :          * For delayed allocation, we could skip the ENOSPC and
    4371                 :            :          * EDQUOT check, as blocks and quotas have been already
    4372                 :            :          * reserved when data being copied into pagecache.
    4373                 :            :          */
    4374         [ +  + ]:     217527 :         if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
    4375                 :      98141 :                 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
    4376                 :            :         else {
    4377                 :            :                 /* Without delayed allocation we need to verify
    4378                 :            :                  * there is enough free blocks to do block allocation
    4379                 :            :                  * and verify allocation doesn't exceed the quota limits.
    4380                 :            :                  */
    4381   [ +  -  -  + ]:     238772 :                 while (ar->len &&
    4382                 :     119386 :                         ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
    4383                 :            : 
    4384                 :            :                         /* let others to free the space */
    4385                 :          0 :                         cond_resched();
    4386                 :          0 :                         ar->len = ar->len >> 1;
    4387                 :            :                 }
    4388         [ -  + ]:     119386 :                 if (!ar->len) {
    4389                 :          0 :                         *errp = -ENOSPC;
    4390                 :          0 :                         return 0;
    4391                 :            :                 }
    4392                 :            :                 reserv_clstrs = ar->len;
    4393         [ +  - ]:     119386 :                 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
    4394                 :          0 :                         dquot_alloc_block_nofail(ar->inode,
    4395                 :          0 :                                                  EXT4_C2B(sbi, ar->len));
    4396                 :            :                 } else {
    4397 [ +  - ][ -  + ]:     238772 :                         while (ar->len &&
    4398                 :     238772 :                                 dquot_alloc_block(ar->inode,
    4399                 :     119386 :                                                   EXT4_C2B(sbi, ar->len))) {
    4400                 :            : 
    4401                 :          0 :                                 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
    4402                 :          0 :                                 ar->len--;
    4403                 :            :                         }
    4404                 :            :                 }
    4405                 :     119386 :                 inquota = ar->len;
    4406         [ -  + ]:     119386 :                 if (ar->len == 0) {
    4407                 :          0 :                         *errp = -EDQUOT;
    4408                 :          0 :                         goto out;
    4409                 :            :                 }
    4410                 :            :         }
    4411                 :            : 
    4412                 :     217527 :         ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
    4413         [ -  + ]:     217520 :         if (!ac) {
    4414                 :          0 :                 ar->len = 0;
    4415                 :          0 :                 *errp = -ENOMEM;
    4416                 :          0 :                 goto out;
    4417                 :            :         }
    4418                 :            : 
    4419                 :     217520 :         *errp = ext4_mb_initialize_context(ac, ar);
    4420         [ -  + ]:     217507 :         if (*errp) {
    4421                 :          0 :                 ar->len = 0;
    4422                 :          0 :                 goto out;
    4423                 :            :         }
    4424                 :            : 
    4425                 :     217507 :         ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
    4426         [ +  + ]:     217507 :         if (!ext4_mb_use_preallocated(ac)) {
    4427                 :      88752 :                 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
    4428                 :      88752 :                 ext4_mb_normalize_request(ac, ar);
    4429                 :            : repeat:
    4430                 :            :                 /* allocate space in core */
    4431                 :      88754 :                 *errp = ext4_mb_regular_allocator(ac);
    4432         [ +  + ]:      88753 :                 if (*errp)
    4433                 :            :                         goto discard_and_exit;
    4434                 :            : 
    4435                 :            :                 /* as we've just preallocated more space than
    4436                 :            :                  * user requested originally, we store allocated
    4437                 :            :                  * space in a special descriptor */
    4438 [ +  - ][ +  + ]:      88752 :                 if (ac->ac_status == AC_STATUS_FOUND &&
    4439                 :      88752 :                     ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
    4440                 :      22256 :                         *errp = ext4_mb_new_preallocation(ac);
    4441            [ + ]:      88752 :                 if (*errp) {
    4442                 :            :                 discard_and_exit:
    4443                 :            :                         ext4_discard_allocated_blocks(ac);
    4444                 :            :                         goto errout;
    4445                 :            :                 }
    4446                 :            :         }
    4447         [ +  - ]:     217527 :         if (likely(ac->ac_status == AC_STATUS_FOUND)) {
    4448                 :     217527 :                 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
    4449         [ -  + ]:     217540 :                 if (*errp == -EAGAIN) {
    4450                 :            :                         /*
    4451                 :            :                          * drop the reference that we took
    4452                 :            :                          * in ext4_mb_use_best_found
    4453                 :            :                          */
    4454                 :          0 :                         ext4_mb_release_context(ac);
    4455                 :          0 :                         ac->ac_b_ex.fe_group = 0;
    4456                 :          0 :                         ac->ac_b_ex.fe_start = 0;
    4457                 :          0 :                         ac->ac_b_ex.fe_len = 0;
    4458                 :          0 :                         ac->ac_status = AC_STATUS_CONTINUE;
    4459                 :          0 :                         goto repeat;
    4460         [ -  + ]:     217540 :                 } else if (*errp) {
    4461                 :            :                         ext4_discard_allocated_blocks(ac);
    4462                 :            :                         goto errout;
    4463                 :            :                 } else {
    4464                 :     217540 :                         block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
    4465                 :     217540 :                         ar->len = ac->ac_b_ex.fe_len;
    4466                 :            :                 }
    4467                 :            :         } else {
    4468                 :          0 :                 freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
    4469         [ #  # ]:          0 :                 if (freed)
    4470                 :            :                         goto repeat;
    4471                 :          0 :                 *errp = -ENOSPC;
    4472                 :            :         }
    4473                 :            : 
    4474                 :            : errout:
    4475         [ -  + ]:     217540 :         if (*errp) {
    4476                 :          0 :                 ac->ac_b_ex.fe_len = 0;
    4477                 :          0 :                 ar->len = 0;
    4478                 :            :                 ext4_mb_show_ac(ac);
    4479                 :            :         }
    4480                 :     217540 :         ext4_mb_release_context(ac);
    4481                 :            : out:
    4482            [ + ]:     217504 :         if (ac)
    4483                 :     217509 :                 kmem_cache_free(ext4_ac_cachep, ac);
    4484 [ +  + ][ -  + ]:     217534 :         if (inquota && ar->len < inquota)
    4485                 :          0 :                 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
    4486         [ -  + ]:     217534 :         if (!ar->len) {
    4487         [ #  # ]:          0 :                 if (!ext4_test_inode_state(ar->inode,
    4488                 :            :                                            EXT4_STATE_DELALLOC_RESERVED))
    4489                 :            :                         /* release all the reserved blocks if non delalloc */
    4490                 :          0 :                         percpu_counter_sub(&sbi->s_dirtyclusters_counter,
    4491                 :            :                                                 reserv_clstrs);
    4492                 :            :         }
    4493                 :            : 
    4494                 :            :         trace_ext4_allocate_blocks(ar, (unsigned long long)block);
    4495                 :            : 
    4496                 :     217534 :         return block;
    4497                 :            : }
    4498                 :            : 
    4499                 :            : /*
    4500                 :            :  * We can merge two free data extents only if the physical blocks
    4501                 :            :  * are contiguous, AND the extents were freed by the same transaction,
    4502                 :            :  * AND the blocks are associated with the same group.
    4503                 :            :  */
    4504                 :            : static int can_merge(struct ext4_free_data *entry1,
    4505                 :            :                         struct ext4_free_data *entry2)
    4506                 :            : {
    4507 [ +  + ][ +  - ]:     203981 :         if ((entry1->efd_tid == entry2->efd_tid) &&
         [ +  + ][ +  - ]
    4508 [ +  + ][ +  + ]:     203981 :             (entry1->efd_group == entry2->efd_group) &&
    4509                 :     202791 :             ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
    4510                 :            :                 return 1;
    4511                 :            :         return 0;
    4512                 :            : }
    4513                 :            : 
    4514                 :            : static noinline_for_stack int
    4515                 :          0 : ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
    4516                 :            :                       struct ext4_free_data *new_entry)
    4517                 :            : {
    4518                 :     127596 :         ext4_group_t group = e4b->bd_group;
    4519                 :            :         ext4_grpblk_t cluster;
    4520                 :            :         struct ext4_free_data *entry;
    4521                 :     127596 :         struct ext4_group_info *db = e4b->bd_info;
    4522                 :     127596 :         struct super_block *sb = e4b->bd_sb;
    4523                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    4524                 :     127596 :         struct rb_node **n = &db->bb_free_root.rb_node, *node;
    4525                 :            :         struct rb_node *parent = NULL, *new_node;
    4526                 :            : 
    4527         [ -  + ]:     127596 :         BUG_ON(!ext4_handle_valid(handle));
    4528         [ -  + ]:     127596 :         BUG_ON(e4b->bd_bitmap_page == NULL);
    4529         [ -  + ]:     127596 :         BUG_ON(e4b->bd_buddy_page == NULL);
    4530                 :            : 
    4531                 :     127596 :         new_node = &new_entry->efd_node;
    4532                 :     127596 :         cluster = new_entry->efd_start_cluster;
    4533                 :            : 
    4534         [ +  + ]:     127596 :         if (!*n) {
    4535                 :            :                 /* first free block exent. We need to
    4536                 :            :                    protect buddy cache from being freed,
    4537                 :            :                  * otherwise we'll refresh it from
    4538                 :            :                  * on-disk bitmap and lose not-yet-available
    4539                 :            :                  * blocks */
    4540                 :            :                 page_cache_get(e4b->bd_buddy_page);
    4541                 :       3477 :                 page_cache_get(e4b->bd_bitmap_page);
    4542                 :            :         }
    4543         [ +  + ]:     752613 :         while (*n) {
    4544                 :            :                 parent = *n;
    4545                 :            :                 entry = rb_entry(parent, struct ext4_free_data, efd_node);
    4546         [ +  + ]:     625017 :                 if (cluster < entry->efd_start_cluster)
    4547                 :     342887 :                         n = &(*n)->rb_left;
    4548         [ +  - ]:     282130 :                 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
    4549                 :     282130 :                         n = &(*n)->rb_right;
    4550                 :            :                 else {
    4551                 :          0 :                         ext4_grp_locked_error(sb, group, 0,
    4552                 :            :                                 ext4_group_first_block_no(sb, group) +
    4553                 :            :                                 EXT4_C2B(sbi, cluster),
    4554                 :            :                                 "Block already on to-be-freed list");
    4555                 :     625017 :                         return 0;
    4556                 :            :                 }
    4557                 :            :         }
    4558                 :            : 
    4559                 :            :         rb_link_node(new_node, parent, n);
    4560                 :     127596 :         rb_insert_color(new_node, &db->bb_free_root);
    4561                 :            : 
    4562                 :            :         /* Now try to see the extent can be merged to left and right */
    4563                 :     127595 :         node = rb_prev(new_node);
    4564         [ +  + ]:     127590 :         if (node) {
    4565                 :     103041 :                 entry = rb_entry(node, struct ext4_free_data, efd_node);
    4566   [ +  +  +  - ]:     149581 :                 if (can_merge(entry, new_entry) &&
    4567                 :            :                     ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
    4568                 :      46540 :                         new_entry->efd_start_cluster = entry->efd_start_cluster;
    4569                 :      46540 :                         new_entry->efd_count += entry->efd_count;
    4570                 :      46540 :                         rb_erase(node, &(db->bb_free_root));
    4571                 :      46540 :                         kmem_cache_free(ext4_free_data_cachep, entry);
    4572                 :            :                 }
    4573                 :            :         }
    4574                 :            : 
    4575                 :     127590 :         node = rb_next(new_node);
    4576         [ +  + ]:     127595 :         if (node) {
    4577                 :     100940 :                 entry = rb_entry(node, struct ext4_free_data, efd_node);
    4578   [ +  +  +  - ]:     111406 :                 if (can_merge(new_entry, entry) &&
    4579                 :            :                     ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
    4580                 :      10466 :                         new_entry->efd_count += entry->efd_count;
    4581                 :      10466 :                         rb_erase(node, &(db->bb_free_root));
    4582                 :      10466 :                         kmem_cache_free(ext4_free_data_cachep, entry);
    4583                 :            :                 }
    4584                 :            :         }
    4585                 :            :         /* Add the extent to transaction's private list */
    4586                 :            :         ext4_journal_callback_add(handle, ext4_free_data_callback,
    4587                 :            :                                   &new_entry->efd_jce);
    4588                 :     127596 :         return 0;
    4589                 :            : }
    4590                 :            : 
    4591                 :            : /**
    4592                 :            :  * ext4_free_blocks() -- Free given blocks and update quota
    4593                 :            :  * @handle:             handle for this transaction
    4594                 :            :  * @inode:              inode
    4595                 :            :  * @block:              start physical block to free
    4596                 :            :  * @count:              number of blocks to count
    4597                 :            :  * @flags:              flags used by ext4_free_blocks
    4598                 :            :  */
    4599                 :          0 : void ext4_free_blocks(handle_t *handle, struct inode *inode,
    4600                 :            :                       struct buffer_head *bh, ext4_fsblk_t block,
    4601                 :            :                       unsigned long count, int flags)
    4602                 :            : {
    4603                 :            :         struct buffer_head *bitmap_bh = NULL;
    4604                 :     510359 :         struct super_block *sb = inode->i_sb;
    4605                 :            :         struct ext4_group_desc *gdp;
    4606                 :            :         unsigned int overflow;
    4607                 :            :         ext4_grpblk_t bit;
    4608                 :            :         struct buffer_head *gd_bh;
    4609                 :            :         ext4_group_t block_group;
    4610                 :            :         struct ext4_sb_info *sbi;
    4611                 :            :         struct ext4_inode_info *ei = EXT4_I(inode);
    4612                 :            :         struct ext4_buddy e4b;
    4613                 :            :         unsigned int count_clusters;
    4614                 :            :         int err = 0;
    4615                 :            :         int ret;
    4616                 :            : 
    4617                 :            :         might_sleep();
    4618         [ -  + ]:     127590 :         if (bh) {
    4619         [ #  # ]:          0 :                 if (block)
    4620         [ #  # ]:          0 :                         BUG_ON(block != bh->b_blocknr);
    4621                 :            :                 else
    4622                 :          0 :                         block = bh->b_blocknr;
    4623                 :            :         }
    4624                 :            : 
    4625                 :            :         sbi = EXT4_SB(sb);
    4626   [ +  +  -  + ]:     255177 :         if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
    4627                 :     127589 :             !ext4_data_block_valid(sbi, block, count)) {
    4628                 :          0 :                 ext4_error(sb, "Freeing blocks not in datazone - "
    4629                 :            :                            "block = %llu, count = %lu", block, count);
    4630                 :          0 :                 goto error_return;
    4631                 :            :         }
    4632                 :            : 
    4633                 :            :         ext4_debug("freeing block %llu\n", block);
    4634                 :            :         trace_ext4_free_blocks(inode, block, count, flags);
    4635                 :            : 
    4636         [ +  + ]:     255179 :         if (flags & EXT4_FREE_BLOCKS_FORGET) {
    4637                 :            :                 struct buffer_head *tbh = bh;
    4638                 :            :                 int i;
    4639                 :            : 
    4640         [ +  - ]:      49314 :                 BUG_ON(bh && (count > 1));
    4641                 :            : 
    4642         [ +  + ]:      98654 :                 for (i = 0; i < count; i++) {
    4643                 :      49340 :                         cond_resched();
    4644         [ +  - ]:      49340 :                         if (!bh)
    4645                 :      49340 :                                 tbh = sb_find_get_block(inode->i_sb,
    4646                 :            :                                                         block + i);
    4647         [ +  + ]:      49339 :                         if (!tbh)
    4648                 :       6570 :                                 continue;
    4649                 :      42769 :                         ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
    4650                 :            :                                     inode, tbh, block + i);
    4651                 :            :                 }
    4652                 :            :         }
    4653                 :            : 
    4654                 :            :         /*
    4655                 :            :          * We need to make sure we don't reuse the freed block until
    4656                 :            :          * after the transaction is committed, which we can do by
    4657                 :            :          * treating the block as metadata, below.  We make an
    4658                 :            :          * exception if the inode is to be written in writeback mode
    4659                 :            :          * since writeback mode has weak data consistency guarantees.
    4660                 :            :          */
    4661            [ + ]:     127589 :         if (!ext4_should_writeback_data(inode))
    4662                 :     127590 :                 flags |= EXT4_FREE_BLOCKS_METADATA;
    4663                 :            : 
    4664                 :            :         /*
    4665                 :            :          * If the extent to be freed does not begin on a cluster
    4666                 :            :          * boundary, we need to deal with partial clusters at the
    4667                 :            :          * beginning and end of the extent.  Normally we will free
    4668                 :            :          * blocks at the beginning or the end unless we are explicitly
    4669                 :            :          * requested to avoid doing so.
    4670                 :            :          */
    4671                 :     127589 :         overflow = EXT4_PBLK_COFF(sbi, block);
    4672         [ -  + ]:     127589 :         if (overflow) {
    4673         [ #  # ]:          0 :                 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
    4674                 :          0 :                         overflow = sbi->s_cluster_ratio - overflow;
    4675                 :          0 :                         block += overflow;
    4676         [ #  # ]:          0 :                         if (count > overflow)
    4677                 :          0 :                                 count -= overflow;
    4678                 :            :                         else
    4679                 :            :                                 return;
    4680                 :            :                 } else {
    4681                 :          0 :                         block -= overflow;
    4682                 :          0 :                         count += overflow;
    4683                 :            :                 }
    4684                 :            :         }
    4685                 :     127589 :         overflow = EXT4_LBLK_COFF(sbi, count);
    4686         [ -  + ]:     127589 :         if (overflow) {
    4687         [ #  # ]:          0 :                 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
    4688         [ #  # ]:          0 :                         if (count > overflow)
    4689                 :          0 :                                 count -= overflow;
    4690                 :            :                         else
    4691                 :            :                                 return;
    4692                 :            :                 } else
    4693                 :     127589 :                         count += sbi->s_cluster_ratio - overflow;
    4694                 :            :         }
    4695                 :            : 
    4696                 :            : do_more:
    4697                 :            :         overflow = 0;
    4698                 :     127592 :         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
    4699                 :            : 
    4700            [ + ]:     127594 :         if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
    4701                 :            :                         ext4_get_group_info(sb, block_group))))
    4702                 :            :                 return;
    4703                 :            : 
    4704                 :            :         /*
    4705                 :            :          * Check to see if we are freeing blocks across a group
    4706                 :            :          * boundary.
    4707                 :            :          */
    4708         [ +  + ]:     127595 :         if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
    4709                 :          3 :                 overflow = EXT4_C2B(sbi, bit) + count -
    4710                 :            :                         EXT4_BLOCKS_PER_GROUP(sb);
    4711                 :          3 :                 count -= overflow;
    4712                 :            :         }
    4713                 :     127595 :         count_clusters = EXT4_NUM_B2C(sbi, count);
    4714                 :     127595 :         bitmap_bh = ext4_read_block_bitmap(sb, block_group);
    4715         [ +  + ]:     127593 :         if (!bitmap_bh) {
    4716                 :            :                 err = -EIO;
    4717                 :            :                 goto error_return;
    4718                 :            :         }
    4719                 :     127592 :         gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
    4720            [ + ]:     127592 :         if (!gdp) {
    4721                 :            :                 err = -EIO;
    4722                 :            :                 goto error_return;
    4723                 :            :         }
    4724                 :            : 
    4725         [ -  + ]:     255185 :         if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
           [ #  #  -  + ]
    4726   [ #  #  +  + ]:     255181 :             in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
    4727         [ +  + ]:     255174 :             in_range(block, ext4_inode_table(sb, gdp),
    4728         [ +  + ]:     127588 :                      EXT4_SB(sb)->s_itb_per_group) ||
    4729            [ + ]:     255174 :             in_range(block + count - 1, ext4_inode_table(sb, gdp),
    4730                 :            :                      EXT4_SB(sb)->s_itb_per_group)) {
    4731                 :            : 
    4732                 :          0 :                 ext4_error(sb, "Freeing blocks in system zone - "
    4733                 :            :                            "Block = %llu, count = %lu", block, count);
    4734                 :            :                 /* err = 0. ext4_std_error should be a no op */
    4735                 :          0 :                 goto error_return;
    4736                 :            :         }
    4737                 :            : 
    4738                 :            :         BUFFER_TRACE(bitmap_bh, "getting write access");
    4739                 :     127597 :         err = ext4_journal_get_write_access(handle, bitmap_bh);
    4740         [ +  - ]:     127594 :         if (err)
    4741                 :            :                 goto error_return;
    4742                 :            : 
    4743                 :            :         /*
    4744                 :            :          * We are about to modify some metadata.  Call the journal APIs
    4745                 :            :          * to unshare ->b_data if a currently-committing transaction is
    4746                 :            :          * using it
    4747                 :            :          */
    4748                 :            :         BUFFER_TRACE(gd_bh, "get_write_access");
    4749                 :     127594 :         err = ext4_journal_get_write_access(handle, gd_bh);
    4750         [ +  + ]:     127596 :         if (err)
    4751                 :            :                 goto error_return;
    4752                 :            : #ifdef AGGRESSIVE_CHECK
    4753                 :            :         {
    4754                 :            :                 int i;
    4755                 :            :                 for (i = 0; i < count_clusters; i++)
    4756                 :            :                         BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
    4757                 :            :         }
    4758                 :            : #endif
    4759                 :     127595 :         trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
    4760                 :            : 
    4761                 :     127595 :         err = ext4_mb_load_buddy(sb, block_group, &e4b);
    4762         [ +  - ]:     127594 :         if (err)
    4763                 :            :                 goto error_return;
    4764                 :            : 
    4765    [ +  + ][ + ]:     382783 :         if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
    4766                 :            :                 struct ext4_free_data *new_entry;
    4767                 :            :                 /*
    4768                 :            :                  * blocks being freed are metadata. these blocks shouldn't
    4769                 :            :                  * be used until this transaction is committed
    4770                 :            :                  */
    4771                 :            :         retry:
    4772                 :     127594 :                 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
    4773         [ -  + ]:     127593 :                 if (!new_entry) {
    4774                 :            :                         /*
    4775                 :            :                          * We use a retry loop because
    4776                 :            :                          * ext4_free_blocks() is not allowed to fail.
    4777                 :            :                          */
    4778                 :          0 :                         cond_resched();
    4779                 :          0 :                         congestion_wait(BLK_RW_ASYNC, HZ/50);
    4780                 :          0 :                         goto retry;
    4781                 :            :                 }
    4782                 :     127593 :                 new_entry->efd_start_cluster = bit;
    4783                 :     127593 :                 new_entry->efd_group = block_group;
    4784                 :     127593 :                 new_entry->efd_count = count_clusters;
    4785                 :     127593 :                 new_entry->efd_tid = handle->h_transaction->t_tid;
    4786                 :            : 
    4787                 :            :                 ext4_lock_group(sb, block_group);
    4788                 :     127595 :                 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
    4789                 :     127596 :                 ext4_mb_free_metadata(handle, &e4b, new_entry);
    4790                 :            :         } else {
    4791                 :            :                 /* need to update group_info->bb_free and bitmap
    4792                 :            :                  * with group lock held. generate_buddy look at
    4793                 :            :                  * them with group lock_held
    4794                 :            :                  */
    4795         [ #  # ]:          0 :                 if (test_opt(sb, DISCARD)) {
    4796                 :          0 :                         err = ext4_issue_discard(sb, block_group, bit, count);
    4797         [ #  # ]:          0 :                         if (err && err != -EOPNOTSUPP)
    4798                 :          0 :                                 ext4_msg(sb, KERN_WARNING, "discard request in"
    4799                 :            :                                          " group:%d block:%d count:%lu failed"
    4800                 :            :                                          " with %d", block_group, bit, count,
    4801                 :            :                                          err);
    4802                 :            :                 } else
    4803                 :          0 :                         EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
    4804                 :            : 
    4805                 :          0 :                 ext4_lock_group(sb, block_group);
    4806                 :          0 :                 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
    4807                 :          0 :                 mb_free_blocks(inode, &e4b, bit, count_clusters);
    4808                 :            :         }
    4809                 :            : 
    4810                 :     127596 :         ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
    4811                 :     127596 :         ext4_free_group_clusters_set(sb, gdp, ret);
    4812                 :     127595 :         ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
    4813                 :     127596 :         ext4_group_desc_csum_set(sb, block_group, gdp);
    4814                 :     127595 :         ext4_unlock_group(sb, block_group);
    4815                 :            : 
    4816         [ +  + ]:     127596 :         if (sbi->s_log_groups_per_flex) {
    4817                 :     127586 :                 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
    4818                 :     255172 :                 atomic64_add(count_clusters,
    4819                 :     127586 :                              &sbi->s_flex_groups[flex_group].free_clusters);
    4820                 :            :         }
    4821                 :            : 
    4822 [ +  + ][ +  - ]:     127595 :         if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) {
    4823                 :        166 :                 percpu_counter_add(&sbi->s_dirtyclusters_counter,
    4824                 :            :                                    count_clusters);
    4825                 :            :                 spin_lock(&ei->i_block_reservation_lock);
    4826         [ +  - ]:        166 :                 if (flags & EXT4_FREE_BLOCKS_METADATA)
    4827                 :        166 :                         ei->i_reserved_meta_blocks += count_clusters;
    4828                 :            :                 else
    4829                 :          0 :                         ei->i_reserved_data_blocks += count_clusters;
    4830                 :            :                 spin_unlock(&ei->i_block_reservation_lock);
    4831         [ +  - ]:        166 :                 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
    4832                 :        166 :                         dquot_reclaim_block(inode,
    4833                 :        166 :                                         EXT4_C2B(sbi, count_clusters));
    4834         [ +  - ]:     127429 :         } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
    4835                 :     127429 :                 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
    4836                 :     127596 :         percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
    4837                 :            : 
    4838                 :     127596 :         ext4_mb_unload_buddy(&e4b);
    4839                 :            : 
    4840                 :            :         /* We dirtied the bitmap block */
    4841                 :            :         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
    4842                 :     127593 :         err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
    4843                 :            : 
    4844                 :            :         /* And the group descriptor block */
    4845                 :            :         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
    4846                 :     127592 :         ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
    4847         [ +  - ]:     127596 :         if (!err)
    4848                 :            :                 err = ret;
    4849                 :            : 
    4850         [ +  + ]:     127596 :         if (overflow && !err) {
    4851                 :            :                 block += count;
    4852                 :            :                 count = overflow;
    4853                 :            :                 put_bh(bitmap_bh);
    4854                 :            :                 goto do_more;
    4855                 :            :         }
    4856                 :            : error_return:
    4857                 :            :         brelse(bitmap_bh);
    4858         [ -  + ]:     127592 :         ext4_std_error(sb, err);
    4859                 :            :         return;
    4860                 :            : }
    4861                 :            : 
    4862                 :            : /**
    4863                 :            :  * ext4_group_add_blocks() -- Add given blocks to an existing group
    4864                 :            :  * @handle:                     handle to this transaction
    4865                 :            :  * @sb:                         super block
    4866                 :            :  * @block:                      start physical block to add to the block group
    4867                 :            :  * @count:                      number of blocks to free
    4868                 :            :  *
    4869                 :            :  * This marks the blocks as free in the bitmap and buddy.
    4870                 :            :  */
    4871                 :          0 : int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
    4872                 :            :                          ext4_fsblk_t block, unsigned long count)
    4873                 :            : {
    4874                 :            :         struct buffer_head *bitmap_bh = NULL;
    4875                 :            :         struct buffer_head *gd_bh;
    4876                 :            :         ext4_group_t block_group;
    4877                 :            :         ext4_grpblk_t bit;
    4878                 :            :         unsigned int i;
    4879                 :            :         struct ext4_group_desc *desc;
    4880                 :            :         struct ext4_sb_info *sbi = EXT4_SB(sb);
    4881                 :            :         struct ext4_buddy e4b;
    4882                 :            :         int err = 0, ret, blk_free_count;
    4883                 :            :         ext4_grpblk_t blocks_freed;
    4884                 :            : 
    4885                 :            :         ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
    4886                 :            : 
    4887         [ #  # ]:          0 :         if (count == 0)
    4888                 :            :                 return 0;
    4889                 :            : 
    4890                 :          0 :         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
    4891                 :            :         /*
    4892                 :            :          * Check to see if we are freeing blocks across a group
    4893                 :            :          * boundary.
    4894                 :            :          */
    4895         [ #  # ]:          0 :         if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
    4896                 :          0 :                 ext4_warning(sb, "too much blocks added to group %u\n",
    4897                 :            :                              block_group);
    4898                 :            :                 err = -EINVAL;
    4899                 :          0 :                 goto error_return;
    4900                 :            :         }
    4901                 :            : 
    4902                 :          0 :         bitmap_bh = ext4_read_block_bitmap(sb, block_group);
    4903         [ #  # ]:          0 :         if (!bitmap_bh) {
    4904                 :            :                 err = -EIO;
    4905                 :            :                 goto error_return;
    4906                 :            :         }
    4907                 :            : 
    4908                 :          0 :         desc = ext4_get_group_desc(sb, block_group, &gd_bh);
    4909         [ #  # ]:          0 :         if (!desc) {
    4910                 :            :                 err = -EIO;
    4911                 :            :                 goto error_return;
    4912                 :            :         }
    4913                 :            : 
    4914         [ #  # ]:          0 :         if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
           [ #  #  #  # ]
    4915   [ #  #  #  # ]:          0 :             in_range(ext4_inode_bitmap(sb, desc), block, count) ||
    4916   [ #  #  #  # ]:          0 :             in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
    4917         [ #  # ]:          0 :             in_range(block + count - 1, ext4_inode_table(sb, desc),
    4918                 :            :                      sbi->s_itb_per_group)) {
    4919                 :          0 :                 ext4_error(sb, "Adding blocks in system zones - "
    4920                 :            :                            "Block = %llu, count = %lu",
    4921                 :            :                            block, count);
    4922                 :            :                 err = -EINVAL;
    4923                 :          0 :                 goto error_return;
    4924                 :            :         }
    4925                 :            : 
    4926                 :            :         BUFFER_TRACE(bitmap_bh, "getting write access");
    4927                 :          0 :         err = ext4_journal_get_write_access(handle, bitmap_bh);
    4928         [ #  # ]:          0 :         if (err)
    4929                 :            :                 goto error_return;
    4930                 :            : 
    4931                 :            :         /*
    4932                 :            :          * We are about to modify some metadata.  Call the journal APIs
    4933                 :            :          * to unshare ->b_data if a currently-committing transaction is
    4934                 :            :          * using it
    4935                 :            :          */
    4936                 :            :         BUFFER_TRACE(gd_bh, "get_write_access");
    4937                 :          0 :         err = ext4_journal_get_write_access(handle, gd_bh);
    4938         [ #  # ]:          0 :         if (err)
    4939                 :            :                 goto error_return;
    4940                 :            : 
    4941         [ #  # ]:          0 :         for (i = 0, blocks_freed = 0; i < count; i++) {
    4942                 :            :                 BUFFER_TRACE(bitmap_bh, "clear bit");
    4943         [ #  # ]:          0 :                 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
    4944                 :          0 :                         ext4_error(sb, "bit already cleared for block %llu",
    4945                 :            :                                    (ext4_fsblk_t)(block + i));
    4946                 :            :                         BUFFER_TRACE(bitmap_bh, "bit already cleared");
    4947                 :            :                 } else {
    4948                 :          0 :                         blocks_freed++;
    4949                 :            :                 }
    4950                 :            :         }
    4951                 :            : 
    4952                 :          0 :         err = ext4_mb_load_buddy(sb, block_group, &e4b);
    4953         [ #  # ]:          0 :         if (err)
    4954                 :            :                 goto error_return;
    4955                 :            : 
    4956                 :            :         /*
    4957                 :            :          * need to update group_info->bb_free and bitmap
    4958                 :            :          * with group lock held. generate_buddy look at
    4959                 :            :          * them with group lock_held
    4960                 :            :          */
    4961                 :          0 :         ext4_lock_group(sb, block_group);
    4962                 :          0 :         mb_clear_bits(bitmap_bh->b_data, bit, count);
    4963                 :          0 :         mb_free_blocks(NULL, &e4b, bit, count);
    4964                 :          0 :         blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
    4965                 :          0 :         ext4_free_group_clusters_set(sb, desc, blk_free_count);
    4966                 :          0 :         ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
    4967                 :          0 :         ext4_group_desc_csum_set(sb, block_group, desc);
    4968                 :          0 :         ext4_unlock_group(sb, block_group);
    4969                 :          0 :         percpu_counter_add(&sbi->s_freeclusters_counter,
    4970                 :          0 :                            EXT4_NUM_B2C(sbi, blocks_freed));
    4971                 :            : 
    4972         [ #  # ]:          0 :         if (sbi->s_log_groups_per_flex) {
    4973                 :          0 :                 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
    4974                 :          0 :                 atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
    4975                 :          0 :                              &sbi->s_flex_groups[flex_group].free_clusters);
    4976                 :            :         }
    4977                 :            : 
    4978                 :          0 :         ext4_mb_unload_buddy(&e4b);
    4979                 :            : 
    4980                 :            :         /* We dirtied the bitmap block */
    4981                 :            :         BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
    4982                 :          0 :         err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
    4983                 :            : 
    4984                 :            :         /* And the group descriptor block */
    4985                 :            :         BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
    4986                 :          0 :         ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
    4987         [ #  # ]:          0 :         if (!err)
    4988                 :            :                 err = ret;
    4989                 :            : 
    4990                 :            : error_return:
    4991                 :            :         brelse(bitmap_bh);
    4992         [ #  # ]:          0 :         ext4_std_error(sb, err);
    4993                 :          0 :         return err;
    4994                 :            : }
    4995                 :            : 
    4996                 :            : /**
    4997                 :            :  * ext4_trim_extent -- function to TRIM one single free extent in the group
    4998                 :            :  * @sb:         super block for the file system
    4999                 :            :  * @start:      starting block of the free extent in the alloc. group
    5000                 :            :  * @count:      number of blocks to TRIM
    5001                 :            :  * @group:      alloc. group we are working with
    5002                 :            :  * @e4b:        ext4 buddy for the group
    5003                 :            :  *
    5004                 :            :  * Trim "count" blocks starting at "start" in the "group". To assure that no
    5005                 :            :  * one will allocate those blocks, mark it as used in buddy bitmap. This must
    5006                 :            :  * be called with under the group lock.
    5007                 :            :  */
    5008                 :          0 : static int ext4_trim_extent(struct super_block *sb, int start, int count,
    5009                 :            :                              ext4_group_t group, struct ext4_buddy *e4b)
    5010                 :            : {
    5011                 :            :         struct ext4_free_extent ex;
    5012                 :            :         int ret = 0;
    5013                 :            : 
    5014                 :            :         trace_ext4_trim_extent(sb, group, start, count);
    5015                 :            : 
    5016         [ #  # ]:          0 :         assert_spin_locked(ext4_group_lock_ptr(sb, group));
    5017                 :            : 
    5018                 :          0 :         ex.fe_start = start;
    5019                 :          0 :         ex.fe_group = group;
    5020                 :          0 :         ex.fe_len = count;
    5021                 :            : 
    5022                 :            :         /*
    5023                 :            :          * Mark blocks used, so no one can reuse them while
    5024                 :            :          * being trimmed.
    5025                 :            :          */
    5026                 :          0 :         mb_mark_used(e4b, &ex);
    5027                 :            :         ext4_unlock_group(sb, group);
    5028                 :            :         ret = ext4_issue_discard(sb, group, start, count);
    5029                 :            :         ext4_lock_group(sb, group);
    5030                 :          0 :         mb_free_blocks(NULL, e4b, start, ex.fe_len);
    5031                 :          0 :         return ret;
    5032                 :            : }
    5033                 :            : 
    5034                 :            : /**
    5035                 :            :  * ext4_trim_all_free -- function to trim all free space in alloc. group
    5036                 :            :  * @sb:                 super block for file system
    5037                 :            :  * @group:              group to be trimmed
    5038                 :            :  * @start:              first group block to examine
    5039                 :            :  * @max:                last group block to examine
    5040                 :            :  * @minblocks:          minimum extent block count
    5041                 :            :  *
    5042                 :            :  * ext4_trim_all_free walks through group's buddy bitmap searching for free
    5043                 :            :  * extents. When the free block is found, ext4_trim_extent is called to TRIM
    5044                 :            :  * the extent.
    5045                 :            :  *
    5046                 :            :  *
    5047                 :            :  * ext4_trim_all_free walks through group's block bitmap searching for free
    5048                 :            :  * extents. When the free extent is found, mark it as used in group buddy
    5049                 :            :  * bitmap. Then issue a TRIM command on this extent and free the extent in
    5050                 :            :  * the group buddy bitmap. This is done until whole group is scanned.
    5051                 :            :  */
    5052                 :            : static ext4_grpblk_t
    5053                 :          0 : ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
    5054                 :            :                    ext4_grpblk_t start, ext4_grpblk_t max,
    5055                 :            :                    ext4_grpblk_t minblocks)
    5056                 :            : {
    5057                 :            :         void *bitmap;
    5058                 :            :         ext4_grpblk_t next, count = 0, free_count = 0;
    5059                 :            :         struct ext4_buddy e4b;
    5060                 :            :         int ret = 0;
    5061                 :            : 
    5062                 :            :         trace_ext4_trim_all_free(sb, group, start, max);
    5063                 :            : 
    5064                 :          0 :         ret = ext4_mb_load_buddy(sb, group, &e4b);
    5065         [ #  # ]:          0 :         if (ret) {
    5066                 :          0 :                 ext4_error(sb, "Error in loading buddy "
    5067                 :            :                                 "information for %u", group);
    5068                 :          0 :                 return ret;
    5069                 :            :         }
    5070                 :          0 :         bitmap = e4b.bd_bitmap;
    5071                 :            : 
    5072                 :            :         ext4_lock_group(sb, group);
    5073 [ #  # ][ #  # ]:          0 :         if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
    5074                 :          0 :             minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
    5075                 :            :                 goto out;
    5076                 :            : 
    5077                 :          0 :         start = (e4b.bd_info->bb_first_free > start) ?
    5078                 :          0 :                 e4b.bd_info->bb_first_free : start;
    5079                 :            : 
    5080         [ #  # ]:          0 :         while (start <= max) {
    5081                 :          0 :                 start = mb_find_next_zero_bit(bitmap, max + 1, start);
    5082         [ #  # ]:          0 :                 if (start > max)
    5083                 :            :                         break;
    5084                 :            :                 next = mb_find_next_bit(bitmap, max + 1, start);
    5085                 :            : 
    5086         [ #  # ]:          0 :                 if ((next - start) >= minblocks) {
    5087                 :          0 :                         ret = ext4_trim_extent(sb, start,
    5088                 :            :                                                next - start, group, &e4b);
    5089         [ #  # ]:          0 :                         if (ret && ret != -EOPNOTSUPP)
    5090                 :            :                                 break;
    5091                 :            :                         ret = 0;
    5092                 :          0 :                         count += next - start;
    5093                 :            :                 }
    5094                 :          0 :                 free_count += next - start;
    5095                 :          0 :                 start = next + 1;
    5096                 :            : 
    5097         [ #  # ]:          0 :                 if (fatal_signal_pending(current)) {
    5098                 :            :                         count = -ERESTARTSYS;
    5099                 :            :                         break;
    5100                 :            :                 }
    5101                 :            : 
    5102         [ #  # ]:          0 :                 if (need_resched()) {
    5103                 :            :                         ext4_unlock_group(sb, group);
    5104                 :          0 :                         cond_resched();
    5105                 :            :                         ext4_lock_group(sb, group);
    5106                 :            :                 }
    5107                 :            : 
    5108         [ #  # ]:          0 :                 if ((e4b.bd_info->bb_free - free_count) < minblocks)
    5109                 :            :                         break;
    5110                 :            :         }
    5111                 :            : 
    5112         [ #  # ]:          0 :         if (!ret) {
    5113                 :            :                 ret = count;
    5114                 :          0 :                 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
    5115                 :            :         }
    5116                 :            : out:
    5117                 :            :         ext4_unlock_group(sb, group);
    5118                 :          0 :         ext4_mb_unload_buddy(&e4b);
    5119                 :            : 
    5120                 :            :         ext4_debug("trimmed %d blocks in the group %d\n",
    5121                 :            :                 count, group);
    5122                 :            : 
    5123                 :          0 :         return ret;
    5124                 :            : }
    5125                 :            : 
    5126                 :            : /**
    5127                 :            :  * ext4_trim_fs() -- trim ioctl handle function
    5128                 :            :  * @sb:                 superblock for filesystem
    5129                 :            :  * @range:              fstrim_range structure
    5130                 :            :  *
    5131                 :            :  * start:       First Byte to trim
    5132                 :            :  * len:         number of Bytes to trim from start
    5133                 :            :  * minlen:      minimum extent length in Bytes
    5134                 :            :  * ext4_trim_fs goes through all allocation groups containing Bytes from
    5135                 :            :  * start to start+len. For each such a group ext4_trim_all_free function
    5136                 :            :  * is invoked to trim all free space.
    5137                 :            :  */
    5138                 :          0 : int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
    5139                 :            : {
    5140                 :            :         struct ext4_group_info *grp;
    5141                 :            :         ext4_group_t group, first_group, last_group;
    5142                 :            :         ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
    5143                 :            :         uint64_t start, end, minlen, trimmed = 0;
    5144                 :          0 :         ext4_fsblk_t first_data_blk =
    5145                 :          0 :                         le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
    5146                 :          0 :         ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
    5147                 :            :         int ret = 0;
    5148                 :            : 
    5149                 :          0 :         start = range->start >> sb->s_blocksize_bits;
    5150                 :          0 :         end = start + (range->len >> sb->s_blocksize_bits) - 1;
    5151                 :          0 :         minlen = EXT4_NUM_B2C(EXT4_SB(sb),
    5152                 :            :                               range->minlen >> sb->s_blocksize_bits);
    5153                 :            : 
    5154 [ #  # ][ #  # ]:          0 :         if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
    5155         [ #  # ]:          0 :             start >= max_blks ||
    5156                 :          0 :             range->len < sb->s_blocksize)
    5157                 :            :                 return -EINVAL;
    5158         [ #  # ]:          0 :         if (end >= max_blks)
    5159                 :          0 :                 end = max_blks - 1;
    5160         [ #  # ]:          0 :         if (end <= first_data_blk)
    5161                 :            :                 goto out;
    5162         [ #  # ]:          0 :         if (start < first_data_blk)
    5163                 :            :                 start = first_data_blk;
    5164                 :            : 
    5165                 :            :         /* Determine first and last group to examine based on start and end */
    5166                 :          0 :         ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
    5167                 :            :                                      &first_group, &first_cluster);
    5168                 :          0 :         ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
    5169                 :            :                                      &last_group, &last_cluster);
    5170                 :            : 
    5171                 :            :         /* end now represents the last cluster to discard in this group */
    5172                 :          0 :         end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
    5173                 :            : 
    5174         [ #  # ]:          0 :         for (group = first_group; group <= last_group; group++) {
    5175                 :            :                 grp = ext4_get_group_info(sb, group);
    5176                 :            :                 /* We only do this if the grp has never been initialized */
    5177         [ #  # ]:          0 :                 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
    5178                 :          0 :                         ret = ext4_mb_init_group(sb, group);
    5179         [ #  # ]:          0 :                         if (ret)
    5180                 :            :                                 break;
    5181                 :            :                 }
    5182                 :            : 
    5183                 :            :                 /*
    5184                 :            :                  * For all the groups except the last one, last cluster will
    5185                 :            :                  * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
    5186                 :            :                  * change it for the last group, note that last_cluster is
    5187                 :            :                  * already computed earlier by ext4_get_group_no_and_offset()
    5188                 :            :                  */
    5189         [ #  # ]:          0 :                 if (group == last_group)
    5190                 :          0 :                         end = last_cluster;
    5191                 :            : 
    5192         [ #  # ]:          0 :                 if (grp->bb_free >= minlen) {
    5193                 :          0 :                         cnt = ext4_trim_all_free(sb, group, first_cluster,
    5194                 :            :                                                 end, minlen);
    5195         [ #  # ]:          0 :                         if (cnt < 0) {
    5196                 :            :                                 ret = cnt;
    5197                 :            :                                 break;
    5198                 :            :                         }
    5199                 :          0 :                         trimmed += cnt;
    5200                 :            :                 }
    5201                 :            : 
    5202                 :            :                 /*
    5203                 :            :                  * For every group except the first one, we are sure
    5204                 :            :                  * that the first cluster to discard will be cluster #0.
    5205                 :            :                  */
    5206                 :          0 :                 first_cluster = 0;
    5207                 :            :         }
    5208                 :            : 
    5209         [ #  # ]:          0 :         if (!ret)
    5210                 :          0 :                 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
    5211                 :            : 
    5212                 :            : out:
    5213                 :          0 :         range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
    5214                 :          0 :         return ret;
    5215                 :            : }

Generated by: LCOV version 1.9