LCOV - coverage.info - fs/buffer.c

LCOV - code coverage report

Current view:	top level - fs - buffer.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	586	1073	54.6 %
Date:	2014-04-16	Functions:	74	101	73.3 %
		Branches:	368	1152	31.9 %

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :  *  linux/fs/buffer.c
       3                 :            :  *
       4                 :            :  *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
       5                 :            :  */
       6                 :            : 
       7                 :            : /*
       8                 :            :  * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
       9                 :            :  *
      10                 :            :  * Removed a lot of unnecessary code and simplified things now that
      11                 :            :  * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
      12                 :            :  *
      13                 :            :  * Speed up hash, lru, and free list operations.  Use gfp() for allocating
      14                 :            :  * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
      15                 :            :  *
      16                 :            :  * Added 32k buffer block sizes - these are required older ARM systems. - RMK
      17                 :            :  *
      18                 :            :  * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
      19                 :            :  */
      20                 :            : 
      21                 :            : #include <linux/kernel.h>
      22                 :            : #include <linux/syscalls.h>
      23                 :            : #include <linux/fs.h>
      24                 :            : #include <linux/mm.h>
      25                 :            : #include <linux/percpu.h>
      26                 :            : #include <linux/slab.h>
      27                 :            : #include <linux/capability.h>
      28                 :            : #include <linux/blkdev.h>
      29                 :            : #include <linux/file.h>
      30                 :            : #include <linux/quotaops.h>
      31                 :            : #include <linux/highmem.h>
      32                 :            : #include <linux/export.h>
      33                 :            : #include <linux/writeback.h>
      34                 :            : #include <linux/hash.h>
      35                 :            : #include <linux/suspend.h>
      36                 :            : #include <linux/buffer_head.h>
      37                 :            : #include <linux/task_io_accounting_ops.h>
      38                 :            : #include <linux/bio.h>
      39                 :            : #include <linux/notifier.h>
      40                 :            : #include <linux/cpu.h>
      41                 :            : #include <linux/bitops.h>
      42                 :            : #include <linux/mpage.h>
      43                 :            : #include <linux/bit_spinlock.h>
      44                 :            : #include <trace/events/block.h>
      45                 :            : 
      46                 :            : static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
      47                 :            : 
      48                 :            : #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
      49                 :            : 
      50                 :          0 : void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
      51                 :            : {
      52                 :      55318 :         bh->b_end_io = handler;
      53                 :      55318 :         bh->b_private = private;
      54                 :          0 : }
      55                 :            : EXPORT_SYMBOL(init_buffer);
      56                 :            : 
      57                 :          0 : inline void touch_buffer(struct buffer_head *bh)
      58                 :            : {
      59                 :            :         trace_block_touch_buffer(bh);
      60                 :   10867480 :         mark_page_accessed(bh->b_page);
      61                 :          0 : }
      62                 :            : EXPORT_SYMBOL(touch_buffer);
      63                 :            : 
      64                 :          0 : static int sleep_on_buffer(void *word)
      65                 :            : {
      66                 :      41447 :         io_schedule();
      67                 :      41447 :         return 0;
      68                 :            : }
      69                 :            : 
      70                 :          0 : void __lock_buffer(struct buffer_head *bh)
      71                 :            : {
      72                 :      17593 :         wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
      73                 :            :                                                         TASK_UNINTERRUPTIBLE);
      74                 :          0 : }
      75                 :            : EXPORT_SYMBOL(__lock_buffer);
      76                 :            : 
      77                 :          0 : void unlock_buffer(struct buffer_head *bh)
      78                 :            : {
      79                 :   14389572 :         clear_bit_unlock(BH_Lock, &bh->b_state);
      80                 :   14394799 :         smp_mb__after_clear_bit();
      81                 :   14390937 :         wake_up_bit(&bh->b_state, BH_Lock);
      82                 :   14388812 : }
      83                 :            : EXPORT_SYMBOL(unlock_buffer);
      84                 :            : 
      85                 :            : /*
      86                 :            :  * Returns if the page has dirty or writeback buffers. If all the buffers
      87                 :            :  * are unlocked and clean then the PageDirty information is stale. If
      88                 :            :  * any of the pages are locked, it is assumed they are locked for IO.
      89                 :            :  */
      90                 :          0 : void buffer_check_dirty_writeback(struct page *page,
      91                 :            :                                      bool *dirty, bool *writeback)
      92                 :            : {
      93                 :            :         struct buffer_head *head, *bh;
      94                 :      51734 :         *dirty = false;
      95                 :      51734 :         *writeback = false;
      96                 :            : 
      97         [ -  + ]:      51734 :         BUG_ON(!PageLocked(page));
      98                 :            : 
      99            [ + ]:      51734 :         if (!page_has_buffers(page))
     100                 :          0 :                 return;
     101                 :            : 
     102         [ +  + ]:      51735 :         if (PageWriteback(page))
     103                 :       1033 :                 *writeback = true;
     104                 :            : 
     105                 :            :         head = page_buffers(page);
     106                 :            :         bh = head;
     107                 :            :         do {
     108         [ +  + ]:      51735 :                 if (buffer_locked(bh))
     109                 :       1033 :                         *writeback = true;
     110                 :            : 
     111         [ +  + ]:      51735 :                 if (buffer_dirty(bh))
     112                 :          6 :                         *dirty = true;
     113                 :            : 
     114                 :      51735 :                 bh = bh->b_this_page;
     115         [ -  + ]:      51735 :         } while (bh != head);
     116                 :            : }
     117                 :            : EXPORT_SYMBOL(buffer_check_dirty_writeback);
     118                 :            : 
     119                 :            : /*
     120                 :            :  * Block until a buffer comes unlocked.  This doesn't stop it
     121                 :            :  * from becoming locked again - you have to lock it yourself
     122                 :            :  * if you want to preserve its state.
     123                 :            :  */
     124                 :          0 : void __wait_on_buffer(struct buffer_head * bh)
     125                 :            : {
     126                 :      39641 :         wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
     127                 :          0 : }
     128                 :            : EXPORT_SYMBOL(__wait_on_buffer);
     129                 :            : 
     130                 :            : static void
     131                 :          0 : __clear_page_buffers(struct page *page)
     132                 :            : {
     133                 :            :         ClearPagePrivate(page);
     134                 :    1823844 :         set_page_private(page, 0);
     135                 :    1823844 :         page_cache_release(page);
     136                 :    1823796 : }
     137                 :            : 
     138                 :            : 
     139                 :          0 : static int quiet_error(struct buffer_head *bh)
     140                 :            : {
     141 [ #  # ][ #  # ]:          0 :         if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
     142                 :            :                 return 0;
     143                 :            :         return 1;
     144                 :            : }
     145                 :            : 
     146                 :            : 
     147                 :          0 : static void buffer_io_error(struct buffer_head *bh)
     148                 :            : {
     149                 :            :         char b[BDEVNAME_SIZE];
     150                 :          0 :         printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
     151                 :            :                         bdevname(bh->b_bdev, b),
     152                 :            :                         (unsigned long long)bh->b_blocknr);
     153                 :          0 : }
     154                 :            : 
     155                 :            : /*
     156                 :            :  * End-of-IO handler helper function which does not touch the bh after
     157                 :            :  * unlocking it.
     158                 :            :  * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
     159                 :            :  * a race there is benign: unlock_buffer() only use the bh's address for
     160                 :            :  * hashing after unlocking the buffer, so it doesn't actually touch the bh
     161                 :            :  * itself.
     162                 :            :  */
     163                 :          0 : static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
     164                 :            : {
     165         [ +  - ]:       7473 :         if (uptodate) {
     166                 :            :                 set_buffer_uptodate(bh);
     167                 :            :         } else {
     168                 :            :                 /* This happens, due to failed READA attempts. */
     169                 :            :                 clear_buffer_uptodate(bh);
     170                 :            :         }
     171                 :       7473 :         unlock_buffer(bh);
     172                 :       7473 : }
     173                 :            : 
     174                 :            : /*
     175                 :            :  * Default synchronous end-of-IO handler..  Just mark it up-to-date and
     176                 :            :  * unlock the buffer. This is what ll_rw_block uses too.
     177                 :            :  */
     178                 :          0 : void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
     179                 :            : {
     180                 :       7473 :         __end_buffer_read_notouch(bh, uptodate);
     181                 :            :         put_bh(bh);
     182                 :       7473 : }
     183                 :            : EXPORT_SYMBOL(end_buffer_read_sync);
     184                 :            : 
     185                 :          0 : void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
     186                 :            : {
     187                 :            :         char b[BDEVNAME_SIZE];
     188                 :            : 
     189         [ +  - ]:        632 :         if (uptodate) {
     190                 :            :                 set_buffer_uptodate(bh);
     191                 :            :         } else {
     192         [ #  # ]:          0 :                 if (!quiet_error(bh)) {
     193                 :          0 :                         buffer_io_error(bh);
     194                 :          0 :                         printk(KERN_WARNING "lost page write due to "
     195                 :            :                                         "I/O error on %s\n",
     196                 :            :                                        bdevname(bh->b_bdev, b));
     197                 :            :                 }
     198                 :            :                 set_buffer_write_io_error(bh);
     199                 :            :                 clear_buffer_uptodate(bh);
     200                 :            :         }
     201                 :        632 :         unlock_buffer(bh);
     202                 :            :         put_bh(bh);
     203                 :        632 : }
     204                 :            : EXPORT_SYMBOL(end_buffer_write_sync);
     205                 :            : 
     206                 :            : /*
     207                 :            :  * Various filesystems appear to want __find_get_block to be non-blocking.
     208                 :            :  * But it's the page lock which protects the buffers.  To get around this,
     209                 :            :  * we get exclusion from try_to_free_buffers with the blockdev mapping's
     210                 :            :  * private_lock.
     211                 :            :  *
     212                 :            :  * Hack idea: for the blockdev mapping, i_bufferlist_lock contention
     213                 :            :  * may be quite high.  This code could TryLock the page, and if that
     214                 :            :  * succeeds, there is no need to take private_lock. (But if
     215                 :            :  * private_lock is contended then so is mapping->tree_lock).
     216                 :            :  */
     217                 :            : static struct buffer_head *
     218                 :          0 : __find_get_block_slow(struct block_device *bdev, sector_t block)
     219                 :            : {
     220                 :    2864563 :         struct inode *bd_inode = bdev->bd_inode;
     221                 :    2864563 :         struct address_space *bd_mapping = bd_inode->i_mapping;
     222                 :            :         struct buffer_head *ret = NULL;
     223                 :            :         pgoff_t index;
     224                 :            :         struct buffer_head *bh;
     225                 :            :         struct buffer_head *head;
     226                 :            :         struct page *page;
     227                 :            :         int all_mapped = 1;
     228                 :            : 
     229                 :    2864563 :         index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
     230                 :    2864563 :         page = find_get_page(bd_mapping, index);
     231         [ +  + ]:    2864154 :         if (!page)
     232                 :            :                 goto out;
     233                 :            : 
     234                 :            :         spin_lock(&bd_mapping->private_lock);
     235            [ + ]:     237965 :         if (!page_has_buffers(page))
     236                 :            :                 goto out_unlock;
     237         [ -  + ]:    3102525 :         head = page_buffers(page);
     238                 :            :         bh = head;
     239                 :            :         do {
     240         [ +  - ]:     238205 :                 if (!buffer_mapped(bh))
     241                 :            :                         all_mapped = 0;
     242         [ +  + ]:     238205 :                 else if (bh->b_blocknr == block) {
     243                 :            :                         ret = bh;
     244                 :            :                         get_bh(bh);
     245                 :            :                         goto out_unlock;
     246                 :            :                 }
     247                 :        243 :                 bh = bh->b_this_page;
     248         [ +  - ]:        243 :         } while (bh != head);
     249                 :            : 
     250                 :            :         /* we might be here because some of the buffers on this page are
     251                 :            :          * not mapped.  This is due to various races between
     252                 :            :          * file io on the block device and getblk.  It gets dealt with
     253                 :            :          * elsewhere, don't buffer_error if we had some unmapped buffers
     254                 :            :          */
     255         [ #  # ]:          0 :         if (all_mapped) {
     256                 :            :                 char b[BDEVNAME_SIZE];
     257                 :            : 
     258                 :          0 :                 printk("__find_get_block_slow() failed. "
     259                 :            :                         "block=%llu, b_blocknr=%llu\n",
     260                 :            :                         (unsigned long long)block,
     261                 :            :                         (unsigned long long)bh->b_blocknr);
     262                 :          0 :                 printk("b_state=0x%08lx, b_size=%zu\n",
     263                 :            :                         bh->b_state, bh->b_size);
     264                 :          0 :                 printk("device %s blocksize: %d\n", bdevname(bdev, b),
     265                 :          0 :                         1 << bd_inode->i_blkbits);
     266                 :            :         }
     267                 :            : out_unlock:
     268                 :            :         spin_unlock(&bd_mapping->private_lock);
     269                 :     237965 :         page_cache_release(page);
     270                 :            : out:
     271                 :    2864191 :         return ret;
     272                 :            : }
     273                 :            : 
     274                 :            : /*
     275                 :            :  * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
     276                 :            :  */
     277                 :          0 : static void free_more_memory(void)
     278                 :            : {
     279                 :            :         struct zone *zone;
     280                 :            :         int nid;
     281                 :            : 
     282                 :          0 :         wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
     283                 :          0 :         yield();
     284                 :            : 
     285         [ #  # ]:          0 :         for_each_online_node(nid) {
     286                 :            :                 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
     287                 :            :                                                 gfp_zone(GFP_NOFS), NULL,
     288                 :            :                                                 &zone);
     289         [ #  # ]:          0 :                 if (zone)
     290                 :          0 :                         try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
     291                 :            :                                                 GFP_NOFS, NULL);
     292                 :            :         }
     293                 :          0 : }
     294                 :            : 
     295                 :            : /*
     296                 :            :  * I/O completion handler for block_read_full_page() - pages
     297                 :            :  * which come unlocked at the end of I/O.
     298                 :            :  */
     299                 :          0 : static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
     300                 :            : {
     301                 :            :         unsigned long flags;
     302                 :            :         struct buffer_head *first;
     303                 :            :         struct buffer_head *tmp;
     304                 :            :         struct page *page;
     305                 :            :         int page_uptodate = 1;
     306                 :            : 
     307         [ -  + ]:       2360 :         BUG_ON(!buffer_async_read(bh));
     308                 :            : 
     309                 :       2360 :         page = bh->b_page;
     310         [ +  - ]:       2360 :         if (uptodate) {
     311                 :            :                 set_buffer_uptodate(bh);
     312                 :            :         } else {
     313                 :            :                 clear_buffer_uptodate(bh);
     314         [ #  # ]:          0 :                 if (!quiet_error(bh))
     315                 :          0 :                         buffer_io_error(bh);
     316                 :            :                 SetPageError(page);
     317                 :            :         }
     318                 :            : 
     319                 :            :         /*
     320                 :            :          * Be _very_ careful from here on. Bad things can happen if
     321                 :            :          * two buffer heads end IO at almost the same time and both
     322                 :            :          * decide that the page is now completely done.
     323                 :            :          */
     324         [ -  + ]:       2360 :         first = page_buffers(page);
     325                 :            :         local_irq_save(flags);
     326                 :       2360 :         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
     327                 :            :         clear_buffer_async_read(bh);
     328                 :       2360 :         unlock_buffer(bh);
     329                 :            :         tmp = bh;
     330                 :            :         do {
     331         [ +  + ]:       6940 :                 if (!buffer_uptodate(tmp))
     332                 :            :                         page_uptodate = 0;
     333         [ +  + ]:       4580 :                 if (buffer_async_read(tmp)) {
     334         [ -  + ]:       1084 :                         BUG_ON(!buffer_locked(tmp));
     335                 :            :                         goto still_busy;
     336                 :            :                 }
     337                 :       3496 :                 tmp = tmp->b_this_page;
     338         [ +  + ]:       3496 :         } while (tmp != bh);
     339                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     340         [ -  + ]:       1276 :         local_irq_restore(flags);
     341                 :            : 
     342                 :            :         /*
     343                 :            :          * If none of the buffers had errors and they are all
     344                 :            :          * uptodate then we can set the page uptodate.
     345                 :            :          */
     346 [ +  - ][ +  - ]:       1276 :         if (page_uptodate && !PageError(page))
     347                 :            :                 SetPageUptodate(page);
     348                 :       1276 :         unlock_page(page);
     349                 :       1276 :         return;
     350                 :            : 
     351                 :            : still_busy:
     352                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     353         [ -  + ]:       1084 :         local_irq_restore(flags);
     354                 :            :         return;
     355                 :            : }
     356                 :            : 
     357                 :            : /*
     358                 :            :  * Completion handler for block_write_full_page() - pages which are unlocked
     359                 :            :  * during I/O, and which have PageWriteback cleared upon I/O completion.
     360                 :            :  */
     361                 :          0 : void end_buffer_async_write(struct buffer_head *bh, int uptodate)
     362                 :            : {
     363                 :            :         char b[BDEVNAME_SIZE];
     364                 :            :         unsigned long flags;
     365                 :            :         struct buffer_head *first;
     366                 :            :         struct buffer_head *tmp;
     367                 :            :         struct page *page;
     368                 :            : 
     369         [ -  + ]:      18071 :         BUG_ON(!buffer_async_write(bh));
     370                 :            : 
     371                 :      18071 :         page = bh->b_page;
     372         [ +  - ]:      18071 :         if (uptodate) {
     373                 :            :                 set_buffer_uptodate(bh);
     374                 :            :         } else {
     375         [ #  # ]:          0 :                 if (!quiet_error(bh)) {
     376                 :          0 :                         buffer_io_error(bh);
     377                 :          0 :                         printk(KERN_WARNING "lost page write due to "
     378                 :            :                                         "I/O error on %s\n",
     379                 :            :                                bdevname(bh->b_bdev, b));
     380                 :            :                 }
     381                 :          0 :                 set_bit(AS_EIO, &page->mapping->flags);
     382                 :            :                 set_buffer_write_io_error(bh);
     383                 :            :                 clear_buffer_uptodate(bh);
     384                 :            :                 SetPageError(page);
     385                 :            :         }
     386                 :            : 
     387         [ -  + ]:      18071 :         first = page_buffers(page);
     388                 :            :         local_irq_save(flags);
     389                 :      18071 :         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
     390                 :            : 
     391                 :            :         clear_buffer_async_write(bh);
     392                 :      18071 :         unlock_buffer(bh);
     393                 :      18071 :         tmp = bh->b_this_page;
     394         [ +  + ]:      18206 :         while (tmp != bh) {
     395         [ +  + ]:        162 :                 if (buffer_async_write(tmp)) {
     396         [ -  + ]:         27 :                         BUG_ON(!buffer_locked(tmp));
     397                 :            :                         goto still_busy;
     398                 :            :                 }
     399                 :        135 :                 tmp = tmp->b_this_page;
     400                 :            :         }
     401                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     402         [ -  + ]:      18044 :         local_irq_restore(flags);
     403                 :      18044 :         end_page_writeback(page);
     404                 :      18044 :         return;
     405                 :            : 
     406                 :            : still_busy:
     407                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     408         [ -  + ]:         27 :         local_irq_restore(flags);
     409                 :            :         return;
     410                 :            : }
     411                 :            : EXPORT_SYMBOL(end_buffer_async_write);
     412                 :            : 
     413                 :            : /*
     414                 :            :  * If a page's buffers are under async readin (end_buffer_async_read
     415                 :            :  * completion) then there is a possibility that another thread of
     416                 :            :  * control could lock one of the buffers after it has completed
     417                 :            :  * but while some of the other buffers have not completed.  This
     418                 :            :  * locked buffer would confuse end_buffer_async_read() into not unlocking
     419                 :            :  * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
     420                 :            :  * that this buffer is not under async I/O.
     421                 :            :  *
     422                 :            :  * The page comes unlocked when it has no locked buffer_async buffers
     423                 :            :  * left.
     424                 :            :  *
     425                 :            :  * PageLocked prevents anyone starting new async I/O reads any of
     426                 :            :  * the buffers.
     427                 :            :  *
     428                 :            :  * PageWriteback is used to prevent simultaneous writeout of the same
     429                 :            :  * page.
     430                 :            :  *
     431                 :            :  * PageLocked prevents anyone from starting writeback of a page which is
     432                 :            :  * under read I/O (PageWriteback is only ever set against a locked page).
     433                 :            :  */
     434                 :            : static void mark_buffer_async_read(struct buffer_head *bh)
     435                 :            : {
     436                 :       2360 :         bh->b_end_io = end_buffer_async_read;
     437                 :            :         set_buffer_async_read(bh);
     438                 :            : }
     439                 :            : 
     440                 :            : static void mark_buffer_async_write_endio(struct buffer_head *bh,
     441                 :            :                                           bh_end_io_t *handler)
     442                 :            : {
     443                 :      18071 :         bh->b_end_io = handler;
     444                 :            :         set_buffer_async_write(bh);
     445                 :            : }
     446                 :            : 
     447                 :          0 : void mark_buffer_async_write(struct buffer_head *bh)
     448                 :            : {
     449                 :            :         mark_buffer_async_write_endio(bh, end_buffer_async_write);
     450                 :          0 : }
     451                 :            : EXPORT_SYMBOL(mark_buffer_async_write);
     452                 :            : 
     453                 :            : 
     454                 :            : /*
     455                 :            :  * fs/buffer.c contains helper functions for buffer-backed address space's
     456                 :            :  * fsync functions.  A common requirement for buffer-based filesystems is
     457                 :            :  * that certain data from the backing blockdev needs to be written out for
     458                 :            :  * a successful fsync().  For example, ext2 indirect blocks need to be
     459                 :            :  * written back and waited upon before fsync() returns.
     460                 :            :  *
     461                 :            :  * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
     462                 :            :  * inode_has_buffers() and invalidate_inode_buffers() are provided for the
     463                 :            :  * management of a list of dependent buffers at ->i_mapping->private_list.
     464                 :            :  *
     465                 :            :  * Locking is a little subtle: try_to_free_buffers() will remove buffers
     466                 :            :  * from their controlling inode's queue when they are being freed.  But
     467                 :            :  * try_to_free_buffers() will be operating against the *blockdev* mapping
     468                 :            :  * at the time, not against the S_ISREG file which depends on those buffers.
     469                 :            :  * So the locking for private_list is via the private_lock in the address_space
     470                 :            :  * which backs the buffers.  Which is different from the address_space 
     471                 :            :  * against which the buffers are listed.  So for a particular address_space,
     472                 :            :  * mapping->private_lock does *not* protect mapping->private_list!  In fact,
     473                 :            :  * mapping->private_list will always be protected by the backing blockdev's
     474                 :            :  * ->private_lock.
     475                 :            :  *
     476                 :            :  * Which introduces a requirement: all buffers on an address_space's
     477                 :            :  * ->private_list must be from the same address_space: the blockdev's.
     478                 :            :  *
     479                 :            :  * address_spaces which do not place buffers at ->private_list via these
     480                 :            :  * utility functions are free to use private_lock and private_list for
     481                 :            :  * whatever they want.  The only requirement is that list_empty(private_list)
     482                 :            :  * be true at clear_inode() time.
     483                 :            :  *
     484                 :            :  * FIXME: clear_inode should not call invalidate_inode_buffers().  The
     485                 :            :  * filesystems should do that.  invalidate_inode_buffers() should just go
     486                 :            :  * BUG_ON(!list_empty).
     487                 :            :  *
     488                 :            :  * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
     489                 :            :  * take an address_space, not an inode.  And it should be called
     490                 :            :  * mark_buffer_dirty_fsync() to clearly define why those buffers are being
     491                 :            :  * queued up.
     492                 :            :  *
     493                 :            :  * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
     494                 :            :  * list if it is already on a list.  Because if the buffer is on a list,
     495                 :            :  * it *must* already be on the right one.  If not, the filesystem is being
     496                 :            :  * silly.  This will save a ton of locking.  But first we have to ensure
     497                 :            :  * that buffers are taken *off* the old inode's list when they are freed
     498                 :            :  * (presumably in truncate).  That requires careful auditing of all
     499                 :            :  * filesystems (do it inside bforget()).  It could also be done by bringing
     500                 :            :  * b_inode back.
     501                 :            :  */
     502                 :            : 
     503                 :            : /*
     504                 :            :  * The buffer's backing address_space's private_lock must be held
     505                 :            :  */
     506                 :          0 : static void __remove_assoc_queue(struct buffer_head *bh)
     507                 :            : {
     508                 :          0 :         list_del_init(&bh->b_assoc_buffers);
     509         [ #  # ]:          0 :         WARN_ON(!bh->b_assoc_map);
     510         [ #  # ]:          0 :         if (buffer_write_io_error(bh))
     511                 :          0 :                 set_bit(AS_EIO, &bh->b_assoc_map->flags);
     512                 :          0 :         bh->b_assoc_map = NULL;
     513                 :          0 : }
     514                 :            : 
     515                 :          0 : int inode_has_buffers(struct inode *inode)
     516                 :            : {
     517                 :    3665625 :         return !list_empty(&inode->i_data.private_list);
     518                 :            : }
     519                 :            : 
     520                 :            : /*
     521                 :            :  * osync is designed to support O_SYNC io.  It waits synchronously for
     522                 :            :  * all already-submitted IO to complete, but does not queue any new
     523                 :            :  * writes to the disk.
     524                 :            :  *
     525                 :            :  * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
     526                 :            :  * you dirty the buffers, and then use osync_inode_buffers to wait for
     527                 :            :  * completion.  Any other dirty buffers which are not yet queued for
     528                 :            :  * write will not be flushed to disk by the osync.
     529                 :            :  */
     530                 :          0 : static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
     531                 :            : {
     532                 :            :         struct buffer_head *bh;
     533                 :            :         struct list_head *p;
     534                 :            :         int err = 0;
     535                 :            : 
     536                 :            :         spin_lock(lock);
     537                 :            : repeat:
     538         [ #  # ]:          0 :         list_for_each_prev(p, list) {
     539                 :          0 :                 bh = BH_ENTRY(p);
     540         [ #  # ]:          0 :                 if (buffer_locked(bh)) {
     541                 :            :                         get_bh(bh);
     542                 :            :                         spin_unlock(lock);
     543                 :            :                         wait_on_buffer(bh);
     544         [ #  # ]:          0 :                         if (!buffer_uptodate(bh))
     545                 :            :                                 err = -EIO;
     546                 :            :                         brelse(bh);
     547                 :            :                         spin_lock(lock);
     548                 :            :                         goto repeat;
     549                 :            :                 }
     550                 :            :         }
     551                 :            :         spin_unlock(lock);
     552                 :          0 :         return err;
     553                 :            : }
     554                 :            : 
     555                 :          0 : static void do_thaw_one(struct super_block *sb, void *unused)
     556                 :            : {
     557                 :            :         char b[BDEVNAME_SIZE];
     558 [ #  # ][ #  # ]:          0 :         while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
     559                 :          0 :                 printk(KERN_WARNING "Emergency Thaw on %s\n",
     560                 :            :                        bdevname(sb->s_bdev, b));
     561                 :          0 : }
     562                 :            : 
     563                 :          0 : static void do_thaw_all(struct work_struct *work)
     564                 :            : {
     565                 :          0 :         iterate_supers(do_thaw_one, NULL);
     566                 :          0 :         kfree(work);
     567                 :          0 :         printk(KERN_WARNING "Emergency Thaw complete\n");
     568                 :          0 : }
     569                 :            : 
     570                 :            : /**
     571                 :            :  * emergency_thaw_all -- forcibly thaw every frozen filesystem
     572                 :            :  *
     573                 :            :  * Used for emergency unfreeze of all filesystems via SysRq
     574                 :            :  */
     575                 :          0 : void emergency_thaw_all(void)
     576                 :            : {
     577                 :            :         struct work_struct *work;
     578                 :            : 
     579                 :            :         work = kmalloc(sizeof(*work), GFP_ATOMIC);
     580         [ #  # ]:          0 :         if (work) {
     581                 :          0 :                 INIT_WORK(work, do_thaw_all);
     582                 :            :                 schedule_work(work);
     583                 :            :         }
     584                 :          0 : }
     585                 :            : 
     586                 :            : /**
     587                 :            :  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
     588                 :            :  * @mapping: the mapping which wants those buffers written
     589                 :            :  *
     590                 :            :  * Starts I/O against the buffers at mapping->private_list, and waits upon
     591                 :            :  * that I/O.
     592                 :            :  *
     593                 :            :  * Basically, this is a convenience function for fsync().
     594                 :            :  * @mapping is a file or directory which needs those buffers to be written for
     595                 :            :  * a successful fsync().
     596                 :            :  */
     597                 :          0 : int sync_mapping_buffers(struct address_space *mapping)
     598                 :            : {
     599                 :          1 :         struct address_space *buffer_mapping = mapping->private_data;
     600                 :            : 
     601 [ -  + ][ #  # ]:          1 :         if (buffer_mapping == NULL || list_empty(&mapping->private_list))
     602                 :            :                 return 0;
     603                 :            : 
     604                 :          0 :         return fsync_buffers_list(&buffer_mapping->private_lock,
     605                 :            :                                         &mapping->private_list);
     606                 :            : }
     607                 :            : EXPORT_SYMBOL(sync_mapping_buffers);
     608                 :            : 
     609                 :            : /*
     610                 :            :  * Called when we've recently written block `bblock', and it is known that
     611                 :            :  * `bblock' was for a buffer_boundary() buffer.  This means that the block at
     612                 :            :  * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
     613                 :            :  * dirty, schedule it for IO.  So that indirects merge nicely with their data.
     614                 :            :  */
     615                 :          0 : void write_boundary_block(struct block_device *bdev,
     616                 :            :                         sector_t bblock, unsigned blocksize)
     617                 :            : {
     618                 :          0 :         struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
     619         [ #  # ]:          0 :         if (bh) {
     620         [ #  # ]:          0 :                 if (buffer_dirty(bh))
     621                 :          0 :                         ll_rw_block(WRITE, 1, &bh);
     622                 :          0 :                 put_bh(bh);
     623                 :            :         }
     624                 :          0 : }
     625                 :            : 
     626                 :          0 : void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
     627                 :            : {
     628                 :          0 :         struct address_space *mapping = inode->i_mapping;
     629                 :          0 :         struct address_space *buffer_mapping = bh->b_page->mapping;
     630                 :            : 
     631                 :          0 :         mark_buffer_dirty(bh);
     632         [ #  # ]:          0 :         if (!mapping->private_data) {
     633                 :          0 :                 mapping->private_data = buffer_mapping;
     634                 :            :         } else {
     635         [ #  # ]:          0 :                 BUG_ON(mapping->private_data != buffer_mapping);
     636                 :            :         }
     637         [ #  # ]:          0 :         if (!bh->b_assoc_map) {
     638                 :            :                 spin_lock(&buffer_mapping->private_lock);
     639                 :          0 :                 list_move_tail(&bh->b_assoc_buffers,
     640                 :            :                                 &mapping->private_list);
     641                 :          0 :                 bh->b_assoc_map = mapping;
     642                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     643                 :            :         }
     644                 :          0 : }
     645                 :            : EXPORT_SYMBOL(mark_buffer_dirty_inode);
     646                 :            : 
     647                 :            : /*
     648                 :            :  * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
     649                 :            :  * dirty.
     650                 :            :  *
     651                 :            :  * If warn is true, then emit a warning if the page is not uptodate and has
     652                 :            :  * not been truncated.
     653                 :            :  */
     654                 :          0 : static void __set_page_dirty(struct page *page,
     655                 :            :                 struct address_space *mapping, int warn)
     656                 :            : {
     657                 :            :         unsigned long flags;
     658                 :            : 
     659                 :    2072754 :         spin_lock_irqsave(&mapping->tree_lock, flags);
     660            [ + ]:    2074364 :         if (page->mapping) { /* Race with truncate? */
     661 [ -  + ][ -  + ]:    2074731 :                 WARN_ON_ONCE(warn && !PageUptodate(page));
         [ -  + ][ #  # ]
                 [ #  # ]
     662                 :    2074731 :                 account_page_dirtied(page, mapping);
     663                 :    2073609 :                 radix_tree_tag_set(&mapping->page_tree,
     664                 :            :                                 page_index(page), PAGECACHE_TAG_DIRTY);
     665                 :            :         }
     666                 :            :         spin_unlock_irqrestore(&mapping->tree_lock, flags);
     667                 :    2075517 :         __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
     668                 :    2073420 : }
     669                 :            : 
     670                 :            : /*
     671                 :            :  * Add a page to the dirty page list.
     672                 :            :  *
     673                 :            :  * It is a sad fact of life that this function is called from several places
     674                 :            :  * deeply under spinlocking.  It may not sleep.
     675                 :            :  *
     676                 :            :  * If the page has buffers, the uptodate buffers are set dirty, to preserve
     677                 :            :  * dirty-state coherency between the page and the buffers.  It the page does
     678                 :            :  * not have buffers then when they are later attached they will all be set
     679                 :            :  * dirty.
     680                 :            :  *
     681                 :            :  * The buffers are dirtied before the page is dirtied.  There's a small race
     682                 :            :  * window in which a writepage caller may see the page cleanness but not the
     683                 :            :  * buffer dirtiness.  That's fine.  If this code were to set the page dirty
     684                 :            :  * before the buffers, a concurrent writepage caller could clear the page dirty
     685                 :            :  * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
     686                 :            :  * page on the dirty page list.
     687                 :            :  *
     688                 :            :  * We use private_lock to lock against try_to_free_buffers while using the
     689                 :            :  * page's buffer list.  Also use this to protect against clean buffers being
     690                 :            :  * added to the page after it was set dirty.
     691                 :            :  *
     692                 :            :  * FIXME: may need to call ->reservepage here as well.  That's rather up to the
     693                 :            :  * address_space though.
     694                 :            :  */
     695                 :          0 : int __set_page_dirty_buffers(struct page *page)
     696                 :            : {
     697                 :            :         int newly_dirty;
     698                 :     616008 :         struct address_space *mapping = page_mapping(page);
     699                 :            : 
     700         [ -  + ]:     616513 :         if (unlikely(!mapping))
     701                 :          0 :                 return !TestSetPageDirty(page);
     702                 :            : 
     703                 :            :         spin_lock(&mapping->private_lock);
     704         [ +  - ]:     616797 :         if (page_has_buffers(page)) {
     705         [ -  + ]:     616797 :                 struct buffer_head *head = page_buffers(page);
     706                 :            :                 struct buffer_head *bh = head;
     707                 :            : 
     708                 :            :                 do {
     709                 :            :                         set_buffer_dirty(bh);
     710                 :     616797 :                         bh = bh->b_this_page;
     711         [ -  + ]:     616797 :                 } while (bh != head);
     712                 :            :         }
     713                 :     616797 :         newly_dirty = !TestSetPageDirty(page);
     714                 :            :         spin_unlock(&mapping->private_lock);
     715                 :            : 
     716         [ -  + ]:     616797 :         if (newly_dirty)
     717                 :          0 :                 __set_page_dirty(page, mapping, 1);
     718                 :     616797 :         return newly_dirty;
     719                 :            : }
     720                 :            : EXPORT_SYMBOL(__set_page_dirty_buffers);
     721                 :            : 
     722                 :            : /*
     723                 :            :  * Write out and wait upon a list of buffers.
     724                 :            :  *
     725                 :            :  * We have conflicting pressures: we want to make sure that all
     726                 :            :  * initially dirty buffers get waited on, but that any subsequently
     727                 :            :  * dirtied buffers don't.  After all, we don't want fsync to last
     728                 :            :  * forever if somebody is actively writing to the file.
     729                 :            :  *
     730                 :            :  * Do this in two main stages: first we copy dirty buffers to a
     731                 :            :  * temporary inode list, queueing the writes as we go.  Then we clean
     732                 :            :  * up, waiting for those writes to complete.
     733                 :            :  * 
     734                 :            :  * During this second stage, any subsequent updates to the file may end
     735                 :            :  * up refiling the buffer on the original inode's dirty list again, so
     736                 :            :  * there is a chance we will end up with a buffer queued for write but
     737                 :            :  * not yet completed on that list.  So, as a final cleanup we go through
     738                 :            :  * the osync code to catch these locked, dirty buffers without requeuing
     739                 :            :  * any newly dirty buffers for write.
     740                 :            :  */
     741                 :          0 : static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
     742                 :            : {
     743                 :            :         struct buffer_head *bh;
     744                 :            :         struct list_head tmp;
     745                 :            :         struct address_space *mapping;
     746                 :            :         int err = 0, err2;
     747                 :            :         struct blk_plug plug;
     748                 :            : 
     749                 :            :         INIT_LIST_HEAD(&tmp);
     750                 :          0 :         blk_start_plug(&plug);
     751                 :            : 
     752                 :            :         spin_lock(lock);
     753         [ #  # ]:          0 :         while (!list_empty(list)) {
     754                 :          0 :                 bh = BH_ENTRY(list->next);
     755                 :          0 :                 mapping = bh->b_assoc_map;
     756                 :          0 :                 __remove_assoc_queue(bh);
     757                 :            :                 /* Avoid race with mark_buffer_dirty_inode() which does
     758                 :            :                  * a lockless check and we rely on seeing the dirty bit */
     759                 :          0 :                 smp_mb();
     760 [ #  # ][ #  # ]:          0 :                 if (buffer_dirty(bh) || buffer_locked(bh)) {
     761                 :          0 :                         list_add(&bh->b_assoc_buffers, &tmp);
     762                 :          0 :                         bh->b_assoc_map = mapping;
     763         [ #  # ]:          0 :                         if (buffer_dirty(bh)) {
     764                 :            :                                 get_bh(bh);
     765                 :            :                                 spin_unlock(lock);
     766                 :            :                                 /*
     767                 :            :                                  * Ensure any pending I/O completes so that
     768                 :            :                                  * write_dirty_buffer() actually writes the
     769                 :            :                                  * current contents - it is a noop if I/O is
     770                 :            :                                  * still in flight on potentially older
     771                 :            :                                  * contents.
     772                 :            :                                  */
     773                 :          0 :                                 write_dirty_buffer(bh, WRITE_SYNC);
     774                 :            : 
     775                 :            :                                 /*
     776                 :            :                                  * Kick off IO for the previous mapping. Note
     777                 :            :                                  * that we will not run the very last mapping,
     778                 :            :                                  * wait_on_buffer() will do that for us
     779                 :            :                                  * through sync_buffer().
     780                 :            :                                  */
     781                 :            :                                 brelse(bh);
     782                 :            :                                 spin_lock(lock);
     783                 :            :                         }
     784                 :            :                 }
     785                 :            :         }
     786                 :            : 
     787                 :            :         spin_unlock(lock);
     788                 :          0 :         blk_finish_plug(&plug);
     789                 :            :         spin_lock(lock);
     790                 :            : 
     791         [ #  # ]:          0 :         while (!list_empty(&tmp)) {
     792                 :          0 :                 bh = BH_ENTRY(tmp.prev);
     793                 :            :                 get_bh(bh);
     794                 :          0 :                 mapping = bh->b_assoc_map;
     795                 :          0 :                 __remove_assoc_queue(bh);
     796                 :            :                 /* Avoid race with mark_buffer_dirty_inode() which does
     797                 :            :                  * a lockless check and we rely on seeing the dirty bit */
     798                 :          0 :                 smp_mb();
     799         [ #  # ]:          0 :                 if (buffer_dirty(bh)) {
     800                 :          0 :                         list_add(&bh->b_assoc_buffers,
     801                 :            :                                  &mapping->private_list);
     802                 :          0 :                         bh->b_assoc_map = mapping;
     803                 :            :                 }
     804                 :            :                 spin_unlock(lock);
     805                 :            :                 wait_on_buffer(bh);
     806         [ #  # ]:          0 :                 if (!buffer_uptodate(bh))
     807                 :            :                         err = -EIO;
     808                 :            :                 brelse(bh);
     809                 :            :                 spin_lock(lock);
     810                 :            :         }
     811                 :            :         
     812                 :            :         spin_unlock(lock);
     813                 :          0 :         err2 = osync_buffers_list(lock, list);
     814         [ #  # ]:          0 :         if (err)
     815                 :            :                 return err;
     816                 :            :         else
     817                 :          0 :                 return err2;
     818                 :            : }
     819                 :            : 
     820                 :            : /*
     821                 :            :  * Invalidate any and all dirty buffers on a given inode.  We are
     822                 :            :  * probably unmounting the fs, but that doesn't mean we have already
     823                 :            :  * done a sync().  Just drop the buffers from the inode list.
     824                 :            :  *
     825                 :            :  * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
     826                 :            :  * assumes that all the buffers are against the blockdev.  Not true
     827                 :            :  * for reiserfs.
     828                 :            :  */
     829                 :          0 : void invalidate_inode_buffers(struct inode *inode)
     830                 :            : {
     831         [ -  + ]:     275073 :         if (inode_has_buffers(inode)) {
     832                 :            :                 struct address_space *mapping = &inode->i_data;
     833                 :            :                 struct list_head *list = &mapping->private_list;
     834                 :          0 :                 struct address_space *buffer_mapping = mapping->private_data;
     835                 :            : 
     836                 :            :                 spin_lock(&buffer_mapping->private_lock);
     837         [ #  # ]:          0 :                 while (!list_empty(list))
     838                 :          0 :                         __remove_assoc_queue(BH_ENTRY(list->next));
     839                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     840                 :            :         }
     841                 :          0 : }
     842                 :            : EXPORT_SYMBOL(invalidate_inode_buffers);
     843                 :            : 
     844                 :            : /*
     845                 :            :  * Remove any clean buffers from the inode's buffer list.  This is called
     846                 :            :  * when we're trying to free the inode itself.  Those buffers can pin it.
     847                 :            :  *
     848                 :            :  * Returns true if all buffers were removed.
     849                 :            :  */
     850                 :          0 : int remove_inode_buffers(struct inode *inode)
     851                 :            : {
     852                 :            :         int ret = 1;
     853                 :            : 
     854         [ -  + ]:        160 :         if (inode_has_buffers(inode)) {
     855                 :            :                 struct address_space *mapping = &inode->i_data;
     856                 :            :                 struct list_head *list = &mapping->private_list;
     857                 :          0 :                 struct address_space *buffer_mapping = mapping->private_data;
     858                 :            : 
     859                 :            :                 spin_lock(&buffer_mapping->private_lock);
     860         [ #  # ]:          0 :                 while (!list_empty(list)) {
     861                 :          0 :                         struct buffer_head *bh = BH_ENTRY(list->next);
     862         [ #  # ]:          0 :                         if (buffer_dirty(bh)) {
     863                 :            :                                 ret = 0;
     864                 :            :                                 break;
     865                 :            :                         }
     866                 :          0 :                         __remove_assoc_queue(bh);
     867                 :            :                 }
     868                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     869                 :            :         }
     870                 :        160 :         return ret;
     871                 :            : }
     872                 :            : 
     873                 :            : /*
     874                 :            :  * Create the appropriate buffers when given a page for data area and
     875                 :            :  * the size of each buffer.. Use the bh->b_this_page linked list to
     876                 :            :  * follow the buffers created.  Return NULL if unable to create more
     877                 :            :  * buffers.
     878                 :            :  *
     879                 :            :  * The retry flag is used to differentiate async IO (paging, swapping)
     880                 :            :  * which may not fail from ordinary buffer allocations.
     881                 :            :  */
     882                 :    1827127 : struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
     883                 :            :                 int retry)
     884                 :            : {
     885                 :            :         struct buffer_head *bh, *head;
     886                 :            :         long offset;
     887                 :            : 
     888                 :            : try_again:
     889                 :            :         head = NULL;
     890                 :            :         offset = PAGE_SIZE;
     891         [ +  + ]:    3657244 :         while ((offset -= size) >= 0) {
     892                 :    1828312 :                 bh = alloc_buffer_head(GFP_NOFS);
     893         [ +  - ]:    1830817 :                 if (!bh)
     894                 :            :                         goto no_grow;
     895                 :            : 
     896                 :    1830817 :                 bh->b_this_page = head;
     897                 :    1830817 :                 bh->b_blocknr = -1;
     898                 :            :                 head = bh;
     899                 :            : 
     900                 :    1830817 :                 bh->b_size = size;
     901                 :            : 
     902                 :            :                 /* Link the buffer to its page */
     903                 :    1830817 :                 set_bh_page(bh, page, offset);
     904                 :            :         }
     905                 :            :         return head;
     906                 :            : /*
     907                 :            :  * In case anything failed, we just free everything we got.
     908                 :            :  */
     909                 :            : no_grow:
     910         [ #  # ]:          0 :         if (head) {
     911                 :            :                 do {
     912                 :            :                         bh = head;
     913                 :          0 :                         head = head->b_this_page;
     914                 :          0 :                         free_buffer_head(bh);
     915         [ #  # ]:          0 :                 } while (head);
     916                 :            :         }
     917                 :            : 
     918                 :            :         /*
     919                 :            :          * Return failure for non-async IO requests.  Async IO requests
     920                 :            :          * are not allowed to fail, so we have to wait until buffer heads
     921                 :            :          * become available.  But we don't want tasks sleeping with 
     922                 :            :          * partially complete buffers, so all were released above.
     923                 :            :          */
     924         [ #  # ]:          0 :         if (!retry)
     925                 :            :                 return NULL;
     926                 :            : 
     927                 :            :         /* We're _really_ low on memory. Now we just
     928                 :            :          * wait for old buffer heads to become free due to
     929                 :            :          * finishing IO.  Since this is an async request and
     930                 :            :          * the reserve list is empty, we're sure there are 
     931                 :            :          * async buffer heads in use.
     932                 :            :          */
     933                 :          0 :         free_more_memory();
     934                 :          0 :         goto try_again;
     935                 :            : }
     936                 :            : EXPORT_SYMBOL_GPL(alloc_page_buffers);
     937                 :            : 
     938                 :            : static inline void
     939                 :            : link_dev_buffers(struct page *page, struct buffer_head *head)
     940                 :            : {
     941                 :            :         struct buffer_head *bh, *tail;
     942                 :            : 
     943                 :            :         bh = head;
     944                 :            :         do {
     945                 :            :                 tail = bh;
     946                 :      55318 :                 bh = bh->b_this_page;
     947         [ +  + ]:      55318 :         } while (bh);
     948                 :      55114 :         tail->b_this_page = head;
     949                 :            :         attach_page_buffers(page, head);
     950                 :            : }
     951                 :            : 
     952                 :          0 : static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
     953                 :            : {
     954                 :            :         sector_t retval = ~((sector_t)0);
     955                 :      55118 :         loff_t sz = i_size_read(bdev->bd_inode);
     956                 :            : 
     957            [ + ]:      55118 :         if (sz) {
     958                 :            :                 unsigned int sizebits = blksize_bits(size);
     959                 :      55118 :                 retval = (sz >> sizebits);
     960                 :            :         }
     961                 :          0 :         return retval;
     962                 :            : }
     963                 :            : 
     964                 :            : /*
     965                 :            :  * Initialise the state of a blockdev page's buffers.
     966                 :            :  */ 
     967                 :            : static sector_t
     968                 :          0 : init_page_buffers(struct page *page, struct block_device *bdev,
     969                 :            :                         sector_t block, int size)
     970                 :            : {
     971         [ -  + ]:      55118 :         struct buffer_head *head = page_buffers(page);
     972                 :            :         struct buffer_head *bh = head;
     973                 :            :         int uptodate = PageUptodate(page);
     974                 :      55118 :         sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
     975                 :            : 
     976                 :            :         do {
     977         [ +  + ]:     110440 :                 if (!buffer_mapped(bh)) {
     978                 :            :                         init_buffer(bh, NULL, NULL);
     979                 :      55318 :                         bh->b_bdev = bdev;
     980                 :      55318 :                         bh->b_blocknr = block;
     981         [ -  + ]:      55318 :                         if (uptodate)
     982                 :            :                                 set_buffer_uptodate(bh);
     983         [ +  - ]:      55318 :                         if (block < end_block)
     984                 :            :                                 set_buffer_mapped(bh);
     985                 :            :                 }
     986                 :      55322 :                 block++;
     987                 :      55322 :                 bh = bh->b_this_page;
     988         [ +  + ]:      55322 :         } while (bh != head);
     989                 :            : 
     990                 :            :         /*
     991                 :            :          * Caller needs to validate requested block against end of device.
     992                 :            :          */
     993                 :      55118 :         return end_block;
     994                 :            : }
     995                 :            : 
     996                 :            : /*
     997                 :            :  * Create the page-cache page that contains the requested block.
     998                 :            :  *
     999                 :            :  * This is used purely for blockdev mappings.
    1000                 :            :  */
    1001                 :            : static int
    1002                 :          0 : grow_dev_page(struct block_device *bdev, sector_t block,
    1003                 :            :                 pgoff_t index, int size, int sizebits)
    1004                 :            : {
    1005                 :      55102 :         struct inode *inode = bdev->bd_inode;
    1006                 :            :         struct page *page;
    1007                 :            :         struct buffer_head *bh;
    1008                 :            :         sector_t end_block;
    1009                 :            :         int ret = 0;            /* Will call free_more_memory() */
    1010                 :            :         gfp_t gfp_mask;
    1011                 :            : 
    1012                 :     110204 :         gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
    1013                 :            :         gfp_mask |= __GFP_MOVABLE;
    1014                 :            :         /*
    1015                 :            :          * XXX: __getblk_slow() can not really deal with failure and
    1016                 :            :          * will endlessly loop on improvised global reclaim.  Prefer
    1017                 :            :          * looping in the allocator rather than here, at least that
    1018                 :            :          * code knows what it's doing.
    1019                 :            :          */
    1020                 :      55102 :         gfp_mask |= __GFP_NOFAIL;
    1021                 :            : 
    1022                 :      55102 :         page = find_or_create_page(inode->i_mapping, index, gfp_mask);
    1023         [ +  - ]:      55118 :         if (!page)
    1024                 :            :                 return ret;
    1025                 :            : 
    1026         [ -  + ]:      55118 :         BUG_ON(!PageLocked(page));
    1027                 :            : 
    1028         [ +  + ]:      55118 :         if (page_has_buffers(page)) {
    1029         [ -  + ]:          4 :                 bh = page_buffers(page);
    1030         [ +  - ]:          4 :                 if (bh->b_size == size) {
    1031                 :          4 :                         end_block = init_page_buffers(page, bdev,
    1032                 :          4 :                                                 index << sizebits, size);
    1033                 :          4 :                         goto done;
    1034                 :            :                 }
    1035         [ #  # ]:          0 :                 if (!try_to_free_buffers(page))
    1036                 :            :                         goto failed;
    1037                 :            :         }
    1038                 :            : 
    1039                 :            :         /*
    1040                 :            :          * Allocate some buffers for this page
    1041                 :            :          */
    1042                 :      55114 :         bh = alloc_page_buffers(page, size, 0);
    1043         [ +  - ]:      55075 :         if (!bh)
    1044                 :            :                 goto failed;
    1045                 :            : 
    1046                 :            :         /*
    1047                 :            :          * Link the page to the buffers and initialise them.  Take the
    1048                 :            :          * lock to be atomic wrt __find_get_block(), which does not
    1049                 :            :          * run under the page lock.
    1050                 :            :          */
    1051                 :      55075 :         spin_lock(&inode->i_mapping->private_lock);
    1052                 :            :         link_dev_buffers(page, bh);
    1053                 :      55114 :         end_block = init_page_buffers(page, bdev, index << sizebits, size);
    1054                 :      55114 :         spin_unlock(&inode->i_mapping->private_lock);
    1055                 :            : done:
    1056         [ -  + ]:      55118 :         ret = (block < end_block) ? 1 : -ENXIO;
    1057                 :            : failed:
    1058                 :      55118 :         unlock_page(page);
    1059                 :      55118 :         page_cache_release(page);
    1060                 :      55118 :         return ret;
    1061                 :            : }
    1062                 :            : 
    1063                 :            : /*
    1064                 :            :  * Create buffers for the specified block device block's page.  If
    1065                 :            :  * that page was dirty, the buffers are set dirty also.
    1066                 :            :  */
    1067                 :            : static int
    1068                 :      55096 : grow_buffers(struct block_device *bdev, sector_t block, int size)
    1069                 :            : {
    1070                 :            :         pgoff_t index;
    1071                 :            :         int sizebits;
    1072                 :            : 
    1073                 :            :         sizebits = -1;
    1074                 :            :         do {
    1075                 :      55232 :                 sizebits++;
    1076         [ +  + ]:      55232 :         } while ((size << sizebits) < PAGE_SIZE);
    1077                 :            : 
    1078                 :      55096 :         index = block >> sizebits;
    1079                 :            : 
    1080                 :            :         /*
    1081                 :            :          * Check for a block which wants to lie outside our maximum possible
    1082                 :            :          * pagecache index.  (this comparison is done using sector_t types).
    1083                 :            :          */
    1084         [ -  + ]:      55096 :         if (unlikely(index != block >> sizebits)) {
    1085                 :            :                 char b[BDEVNAME_SIZE];
    1086                 :            : 
    1087                 :          0 :                 printk(KERN_ERR "%s: requested out-of-range block %llu for "
    1088                 :            :                         "device %s\n",
    1089                 :            :                         __func__, (unsigned long long)block,
    1090                 :            :                         bdevname(bdev, b));
    1091                 :            :                 return -EIO;
    1092                 :            :         }
    1093                 :            : 
    1094                 :            :         /* Create a page with the proper size buffers.. */
    1095                 :      55096 :         return grow_dev_page(bdev, block, index, size, sizebits);
    1096                 :            : }
    1097                 :            : 
    1098                 :            : static struct buffer_head *
    1099                 :          0 : __getblk_slow(struct block_device *bdev, sector_t block, int size)
    1100                 :            : {
    1101                 :            :         /* Size must be multiple of hard sectorsize */
    1102    [ + ][ +  + ]:      55063 :         if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
    1103                 :            :                         (size < 512 || size > PAGE_SIZE))) {
    1104                 :         58 :                 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
    1105                 :            :                                         size);
    1106                 :          0 :                 printk(KERN_ERR "logical block size: %d\n",
    1107                 :            :                                         bdev_logical_block_size(bdev));
    1108                 :            : 
    1109                 :      55007 :                 dump_stack();
    1110                 :          0 :                 return NULL;
    1111                 :            :         }
    1112                 :            : 
    1113                 :            :         for (;;) {
    1114                 :            :                 struct buffer_head *bh;
    1115                 :            :                 int ret;
    1116                 :            : 
    1117                 :     110125 :                 bh = __find_get_block(bdev, block, size);
    1118         [ +  + ]:     110223 :                 if (bh)
    1119                 :            :                         return bh;
    1120                 :            : 
    1121                 :      55111 :                 ret = grow_buffers(bdev, block, size);
    1122         [ +  - ]:      55118 :                 if (ret < 0)
    1123                 :            :                         return NULL;
    1124         [ +  - ]:      55118 :                 if (ret == 0)
    1125                 :          0 :                         free_more_memory();
    1126                 :            :         }
    1127                 :            : }
    1128                 :            : 
    1129                 :            : /*
    1130                 :            :  * The relationship between dirty buffers and dirty pages:
    1131                 :            :  *
    1132                 :            :  * Whenever a page has any dirty buffers, the page's dirty bit is set, and
    1133                 :            :  * the page is tagged dirty in its radix tree.
    1134                 :            :  *
    1135                 :            :  * At all times, the dirtiness of the buffers represents the dirtiness of
    1136                 :            :  * subsections of the page.  If the page has buffers, the page dirty bit is
    1137                 :            :  * merely a hint about the true dirty state.
    1138                 :            :  *
    1139                 :            :  * When a page is set dirty in its entirety, all its buffers are marked dirty
    1140                 :            :  * (if the page has buffers).
    1141                 :            :  *
    1142                 :            :  * When a buffer is marked dirty, its page is dirtied, but the page's other
    1143                 :            :  * buffers are not.
    1144                 :            :  *
    1145                 :            :  * Also.  When blockdev buffers are explicitly read with bread(), they
    1146                 :            :  * individually become uptodate.  But their backing page remains not
    1147                 :            :  * uptodate - even if all of its buffers are uptodate.  A subsequent
    1148                 :            :  * block_read_full_page() against that page will discover all the uptodate
    1149                 :            :  * buffers, will set the page uptodate and will perform no I/O.
    1150                 :            :  */
    1151                 :            : 
    1152                 :            : /**
    1153                 :            :  * mark_buffer_dirty - mark a buffer_head as needing writeout
    1154                 :            :  * @bh: the buffer_head to mark dirty
    1155                 :            :  *
    1156                 :            :  * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
    1157                 :            :  * backing page dirty, then tag the page as dirty in its address_space's radix
    1158                 :            :  * tree and then attach the address_space's inode to its superblock's dirty
    1159                 :            :  * inode list.
    1160                 :            :  *
    1161                 :            :  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
    1162                 :            :  * mapping->tree_lock and mapping->host->i_lock.
    1163                 :            :  */
    1164                 :          0 : void mark_buffer_dirty(struct buffer_head *bh)
    1165                 :            : {
    1166 [ -  + ][ #  # ]:    6671904 :         WARN_ON_ONCE(!buffer_uptodate(bh));
                 [ -  + ]
    1167                 :            : 
    1168                 :            :         trace_block_dirty_buffer(bh);
    1169                 :            : 
    1170                 :            :         /*
    1171                 :            :          * Very *carefully* optimize the it-is-already-dirty case.
    1172                 :            :          *
    1173                 :            :          * Don't let the final "is it dirty" escape to before we
    1174                 :            :          * perhaps modified the buffer.
    1175                 :            :          */
    1176         [ +  + ]:    6673515 :         if (buffer_dirty(bh)) {
    1177                 :    4573482 :                 smp_mb();
    1178         [ -  + ]:    4572134 :                 if (buffer_dirty(bh))
    1179                 :    6672006 :                         return;
    1180                 :            :         }
    1181                 :            : 
    1182         [ +  + ]:    2100094 :         if (!test_set_buffer_dirty(bh)) {
    1183                 :    2099069 :                 struct page *page = bh->b_page;
    1184         [ +  + ]:    2099699 :                 if (!TestSetPageDirty(page)) {
    1185                 :    2074314 :                         struct address_space *mapping = page_mapping(page);
    1186            [ + ]:    2073197 :                         if (mapping)
    1187                 :    2073536 :                                 __set_page_dirty(page, mapping, 0);
    1188                 :            :                 }
    1189                 :            :         }
    1190                 :            : }
    1191                 :            : EXPORT_SYMBOL(mark_buffer_dirty);
    1192                 :            : 
    1193                 :            : /*
    1194                 :            :  * Decrement a buffer_head's reference count.  If all buffers against a page
    1195                 :            :  * have zero reference count, are clean and unlocked, and if the page is clean
    1196                 :            :  * and unlocked then try_to_free_buffers() may strip the buffers from the page
    1197                 :            :  * in preparation for freeing it (sometimes, rarely, buffers are removed from
    1198                 :            :  * a page but it ends up not being freed, and buffers may later be reattached).
    1199                 :            :  */
    1200                 :          0 : void __brelse(struct buffer_head * buf)
    1201                 :            : {
    1202         [ +  - ]:   11273059 :         if (atomic_read(&buf->b_count)) {
    1203                 :            :                 put_bh(buf);
    1204                 :   11276305 :                 return;
    1205                 :            :         }
    1206                 :          0 :         WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
    1207                 :            : }
    1208                 :            : EXPORT_SYMBOL(__brelse);
    1209                 :            : 
    1210                 :            : /*
    1211                 :            :  * bforget() is like brelse(), except it discards any
    1212                 :            :  * potentially dirty data.
    1213                 :            :  */
    1214                 :          0 : void __bforget(struct buffer_head *bh)
    1215                 :            : {
    1216                 :            :         clear_buffer_dirty(bh);
    1217         [ -  + ]:      34984 :         if (bh->b_assoc_map) {
    1218                 :          0 :                 struct address_space *buffer_mapping = bh->b_page->mapping;
    1219                 :            : 
    1220                 :            :                 spin_lock(&buffer_mapping->private_lock);
    1221                 :          0 :                 list_del_init(&bh->b_assoc_buffers);
    1222                 :          0 :                 bh->b_assoc_map = NULL;
    1223                 :            :                 spin_unlock(&buffer_mapping->private_lock);
    1224                 :            :         }
    1225                 :      34984 :         __brelse(bh);
    1226                 :      34984 : }
    1227                 :            : EXPORT_SYMBOL(__bforget);
    1228                 :            : 
    1229                 :          0 : static struct buffer_head *__bread_slow(struct buffer_head *bh)
    1230                 :            : {
    1231                 :            :         lock_buffer(bh);
    1232         [ -  + ]:         88 :         if (buffer_uptodate(bh)) {
    1233                 :          0 :                 unlock_buffer(bh);
    1234                 :          0 :                 return bh;
    1235                 :            :         } else {
    1236                 :            :                 get_bh(bh);
    1237                 :         88 :                 bh->b_end_io = end_buffer_read_sync;
    1238                 :            :                 submit_bh(READ, bh);
    1239                 :            :                 wait_on_buffer(bh);
    1240         [ -  + ]:         88 :                 if (buffer_uptodate(bh))
    1241                 :            :                         return bh;
    1242                 :            :         }
    1243                 :            :         brelse(bh);
    1244                 :            :         return NULL;
    1245                 :            : }
    1246                 :            : 
    1247                 :            : /*
    1248                 :            :  * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
    1249                 :            :  * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
    1250                 :            :  * refcount elevated by one when they're in an LRU.  A buffer can only appear
    1251                 :            :  * once in a particular CPU's LRU.  A single buffer can be present in multiple
    1252                 :            :  * CPU's LRUs at the same time.
    1253                 :            :  *
    1254                 :            :  * This is a transparent caching front-end to sb_bread(), sb_getblk() and
    1255                 :            :  * sb_find_get_block().
    1256                 :            :  *
    1257                 :            :  * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
    1258                 :            :  * a local interrupt disable for that.
    1259                 :            :  */
    1260                 :            : 
    1261                 :            : #define BH_LRU_SIZE     8
    1262                 :            : 
    1263                 :            : struct bh_lru {
    1264                 :            :         struct buffer_head *bhs[BH_LRU_SIZE];
    1265                 :            : };
    1266                 :            : 
    1267                 :            : static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
    1268                 :            : 
    1269                 :            : #ifdef CONFIG_SMP
    1270                 :            : #define bh_lru_lock()   local_irq_disable()
    1271                 :            : #define bh_lru_unlock() local_irq_enable()
    1272                 :            : #else
    1273                 :            : #define bh_lru_lock()   preempt_disable()
    1274                 :            : #define bh_lru_unlock() preempt_enable()
    1275                 :            : #endif
    1276                 :            : 
    1277                 :            : static inline void check_irqs_on(void)
    1278                 :            : {
    1279                 :            : #ifdef irqs_disabled
    1280 [ -  + ][ -  + ]:   11222505 :         BUG_ON(irqs_disabled());
    1281                 :            : #endif
    1282                 :            : }
    1283                 :            : 
    1284                 :            : /*
    1285                 :            :  * The LRU management algorithm is dopey-but-simple.  Sorry.
    1286                 :            :  */
    1287                 :          0 : static void bh_lru_install(struct buffer_head *bh)
    1288                 :            : {
    1289                 :            :         struct buffer_head *evictee = NULL;
    1290                 :            : 
    1291                 :            :         check_irqs_on();
    1292                 :            :         bh_lru_lock();
    1293         [ +  + ]:     236781 :         if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
    1294                 :            :                 struct buffer_head *bhs[BH_LRU_SIZE];
    1295                 :            :                 int in;
    1296                 :            :                 int out = 0;
    1297                 :            : 
    1298                 :            :                 get_bh(bh);
    1299                 :     473526 :                 bhs[out++] = bh;
    1300         [ +  + ]:    2367204 :                 for (in = 0; in < BH_LRU_SIZE; in++) {
    1301                 :            :                         struct buffer_head *bh2 =
    1302                 :    3787360 :                                 __this_cpu_read(bh_lrus.bhs[in]);
    1303                 :            : 
    1304            [ + ]:    1893680 :                         if (bh2 == bh) {
    1305                 :          0 :                                 __brelse(bh2);
    1306                 :            :                         } else {
    1307         [ +  + ]:    1893826 :                                 if (out >= BH_LRU_SIZE) {
    1308         [ -  + ]:     236773 :                                         BUG_ON(evictee != NULL);
    1309                 :            :                                         evictee = bh2;
    1310                 :            :                                 } else {
    1311                 :    1657053 :                                         bhs[out++] = bh2;
    1312                 :            :                                 }
    1313                 :            :                         }
    1314                 :            :                 }
    1315         [ -  + ]:     236755 :                 while (out < BH_LRU_SIZE)
    1316                 :          0 :                         bhs[out++] = NULL;
    1317                 :     236755 :                 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
    1318                 :            :         }
    1319                 :            :         bh_lru_unlock();
    1320                 :            : 
    1321         [ +  + ]:     236759 :         if (evictee)
    1322                 :     235543 :                 __brelse(evictee);
    1323                 :     236733 : }
    1324                 :            : 
    1325                 :            : /*
    1326                 :            :  * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
    1327                 :            :  */
    1328                 :            : static struct buffer_head *
    1329                 :          0 : lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
    1330                 :            : {
    1331                 :            :         struct buffer_head *ret = NULL;
    1332                 :            :         unsigned int i;
    1333                 :            : 
    1334                 :            :         check_irqs_on();
    1335                 :            :         bh_lru_lock();
    1336         [ +  + ]:   21725768 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1337                 :   42703868 :                 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
    1338                 :            : 
    1339       [ + ][ + ]:   21351934 :                 if (bh && bh->b_bdev == bdev &&
                 [ +  + ]
    1340         [ +  + ]:   10634021 :                                 bh->b_blocknr == block && bh->b_size == size) {
    1341         [ +  + ]:   10608490 :                         if (i) {
    1342         [ +  + ]:   11756682 :                                 while (i) {
    1343                 :    7938796 :                                         __this_cpu_write(bh_lrus.bhs[i],
    1344                 :            :                                                 __this_cpu_read(bh_lrus.bhs[i - 1]));
    1345                 :            :                                         i--;
    1346                 :            :                                 }
    1347                 :    3817886 :                                 __this_cpu_write(bh_lrus.bhs[0], bh);
    1348                 :            :                         }
    1349                 :            :                         get_bh(bh);
    1350                 :            :                         ret = bh;
    1351                 :   10633806 :                         break;
    1352                 :            :                 }
    1353                 :            :         }
    1354                 :            :         bh_lru_unlock();
    1355                 :   10985106 :         return ret;
    1356                 :            : }
    1357                 :            : 
    1358                 :            : /*
    1359                 :            :  * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
    1360                 :            :  * it in the LRU and mark it as accessed.  If it is not present then return
    1361                 :            :  * NULL
    1362                 :            :  */
    1363                 :            : struct buffer_head *
    1364                 :          0 : __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
    1365                 :            : {
    1366                 :   10977181 :         struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
    1367                 :            : 
    1368         [ +  + ]:   10986289 :         if (bh == NULL) {
    1369                 :     351850 :                 bh = __find_get_block_slow(bdev, block);
    1370         [ +  + ]:     351844 :                 if (bh)
    1371                 :     236771 :                         bh_lru_install(bh);
    1372                 :            :         }
    1373         [ +  + ]:   10982540 :         if (bh)
    1374                 :            :                 touch_buffer(bh);
    1375                 :   10983041 :         return bh;
    1376                 :            : }
    1377                 :            : EXPORT_SYMBOL(__find_get_block);
    1378                 :            : 
    1379                 :            : /*
    1380                 :            :  * __getblk will locate (and, if necessary, create) the buffer_head
    1381                 :            :  * which corresponds to the passed block_device, block and size. The
    1382                 :            :  * returned buffer has its reference count incremented.
    1383                 :            :  *
    1384                 :            :  * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
    1385                 :            :  * attempt is failing.  FIXME, perhaps?
    1386                 :            :  */
    1387                 :            : struct buffer_head *
    1388                 :          0 : __getblk(struct block_device *bdev, sector_t block, unsigned size)
    1389                 :            : {
    1390                 :   10768395 :         struct buffer_head *bh = __find_get_block(bdev, block, size);
    1391                 :            : 
    1392                 :            :         might_sleep();
    1393         [ +  + ]:   10774516 :         if (bh == NULL)
    1394                 :      55054 :                 bh = __getblk_slow(bdev, block, size);
    1395                 :       6164 :         return bh;
    1396                 :            : }
    1397                 :            : EXPORT_SYMBOL(__getblk);
    1398                 :            : 
    1399                 :            : /*
    1400                 :            :  * Do async read-ahead on a buffer..
    1401                 :            :  */
    1402                 :          0 : void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
    1403                 :            : {
    1404                 :       6488 :         struct buffer_head *bh = __getblk(bdev, block, size);
    1405         [ +  - ]:       6488 :         if (likely(bh)) {
    1406                 :       6488 :                 ll_rw_block(READA, 1, &bh);
    1407                 :       6488 :                 brelse(bh);
    1408                 :            :         }
    1409                 :          0 : }
    1410                 :            : EXPORT_SYMBOL(__breadahead);
    1411                 :            : 
    1412                 :            : /**
    1413                 :            :  *  __bread() - reads a specified block and returns the bh
    1414                 :            :  *  @bdev: the block_device to read from
    1415                 :            :  *  @block: number of block
    1416                 :            :  *  @size: size (in bytes) to read
    1417                 :            :  * 
    1418                 :            :  *  Reads a specified block, and returns buffer head that contains it.
    1419                 :            :  *  It returns NULL if the block was unreadable.
    1420                 :            :  */
    1421                 :            : struct buffer_head *
    1422                 :          0 : __bread(struct block_device *bdev, sector_t block, unsigned size)
    1423                 :            : {
    1424                 :        120 :         struct buffer_head *bh = __getblk(bdev, block, size);
    1425                 :            : 
    1426 [ +  - ][ +  + ]:        120 :         if (likely(bh) && !buffer_uptodate(bh))
    1427                 :         88 :                 bh = __bread_slow(bh);
    1428                 :          0 :         return bh;
    1429                 :            : }
    1430                 :            : EXPORT_SYMBOL(__bread);
    1431                 :            : 
    1432                 :            : /*
    1433                 :            :  * invalidate_bh_lrus() is called rarely - but not only at unmount.
    1434                 :            :  * This doesn't race because it runs in each cpu either in irq
    1435                 :            :  * or with preempt disabled.
    1436                 :            :  */
    1437                 :          0 : static void invalidate_bh_lru(void *arg)
    1438                 :            : {
    1439                 :        181 :         struct bh_lru *b = &get_cpu_var(bh_lrus);
    1440                 :            :         int i;
    1441                 :            : 
    1442         [ +  + ]:       1803 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1443                 :       1442 :                 brelse(b->bhs[i]);
    1444                 :       1441 :                 b->bhs[i] = NULL;
    1445                 :            :         }
    1446                 :        180 :         put_cpu_var(bh_lrus);
    1447                 :        180 : }
    1448                 :            : 
    1449                 :          0 : static bool has_bh_in_lru(int cpu, void *dummy)
    1450                 :            : {
    1451                 :        290 :         struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
    1452                 :            :         int i;
    1453                 :            :         
    1454         [ +  + ]:       1146 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1455         [ +  + ]:       1039 :                 if (b->bhs[i])
    1456                 :            :                         return 1;
    1457                 :            :         }
    1458                 :            : 
    1459                 :            :         return 0;
    1460                 :            : }
    1461                 :            : 
    1462                 :          0 : void invalidate_bh_lrus(void)
    1463                 :            : {
    1464                 :         58 :         on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
    1465                 :         58 : }
    1466                 :            : EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
    1467                 :            : 
    1468                 :          0 : void set_bh_page(struct buffer_head *bh,
    1469                 :            :                 struct page *page, unsigned long offset)
    1470                 :            : {
    1471                 :    1881700 :         bh->b_page = page;
    1472         [ -  + ]:    1881700 :         BUG_ON(offset >= PAGE_SIZE);
    1473         [ +  + ]:    1881700 :         if (PageHighMem(page))
    1474                 :            :                 /*
    1475                 :            :                  * This catches illegal uses and preserves the offset:
    1476                 :            :                  */
    1477                 :    1215561 :                 bh->b_data = (char *)(0 + offset);
    1478                 :            :         else
    1479                 :     666139 :                 bh->b_data = page_address(page) + offset;
    1480                 :    1881735 : }
    1481                 :            : EXPORT_SYMBOL(set_bh_page);
    1482                 :            : 
    1483                 :            : /*
    1484                 :            :  * Called when truncating a buffer on a page completely.
    1485                 :            :  */
    1486                 :          0 : static void discard_buffer(struct buffer_head * bh)
    1487                 :            : {
    1488                 :            :         lock_buffer(bh);
    1489                 :            :         clear_buffer_dirty(bh);
    1490                 :    1716552 :         bh->b_bdev = NULL;
    1491                 :            :         clear_buffer_mapped(bh);
    1492                 :            :         clear_buffer_req(bh);
    1493                 :            :         clear_buffer_new(bh);
    1494                 :            :         clear_buffer_delay(bh);
    1495                 :            :         clear_buffer_unwritten(bh);
    1496                 :    1716544 :         unlock_buffer(bh);
    1497                 :    1716435 : }
    1498                 :            : 
    1499                 :            : /**
    1500                 :            :  * block_invalidatepage - invalidate part or all of a buffer-backed page
    1501                 :            :  *
    1502                 :            :  * @page: the page which is affected
    1503                 :            :  * @offset: start of the range to invalidate
    1504                 :            :  * @length: length of the range to invalidate
    1505                 :            :  *
    1506                 :            :  * block_invalidatepage() is called when all or part of the page has become
    1507                 :            :  * invalidated by a truncate operation.
    1508                 :            :  *
    1509                 :            :  * block_invalidatepage() does not have to release all buffers, but it must
    1510                 :            :  * ensure that no dirty buffer is left outside @offset and that no I/O
    1511                 :            :  * is underway against any of the blocks which are outside the truncation
    1512                 :            :  * point.  Because the caller is about to free (and possibly reuse) those
    1513                 :            :  * blocks on-disk.
    1514                 :            :  */
    1515                 :          0 : void block_invalidatepage(struct page *page, unsigned int offset,
    1516                 :            :                           unsigned int length)
    1517                 :            : {
    1518                 :            :         struct buffer_head *head, *bh, *next;
    1519                 :            :         unsigned int curr_off = 0;
    1520                 :    1730770 :         unsigned int stop = length + offset;
    1521                 :            : 
    1522         [ -  + ]:    1730770 :         BUG_ON(!PageLocked(page));
    1523            [ + ]:    1730770 :         if (!page_has_buffers(page))
    1524                 :            :                 goto out;
    1525                 :            : 
    1526                 :            :         /*
    1527                 :            :          * Check for overflow
    1528                 :            :          */
    1529         [ -  + ]:    3461540 :         BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
    1530                 :            : 
    1531         [ -  + ]:    1730770 :         head = page_buffers(page);
    1532                 :            :         bh = head;
    1533                 :            :         do {
    1534                 :    1732045 :                 unsigned int next_off = curr_off + bh->b_size;
    1535                 :    1732045 :                 next = bh->b_this_page;
    1536                 :            : 
    1537                 :            :                 /*
    1538                 :            :                  * Are we still fully in range ?
    1539                 :            :                  */
    1540            [ + ]:    1732045 :                 if (next_off > stop)
    1541                 :            :                         goto out;
    1542                 :            : 
    1543                 :            :                 /*
    1544                 :            :                  * is this block fully invalidated?
    1545                 :            :                  */
    1546         [ +  + ]:    1732065 :                 if (offset <= curr_off)
    1547                 :    1716592 :                         discard_buffer(bh);
    1548                 :            :                 curr_off = next_off;
    1549                 :            :                 bh = next;
    1550         [ +  + ]:    1731951 :         } while (bh != head);
    1551                 :            : 
    1552                 :            :         /*
    1553                 :            :          * We release buffers only if the entire page is being invalidated.
    1554                 :            :          * The get_block cached value has been unconditionally invalidated,
    1555                 :            :          * so real IO is not possible anymore.
    1556                 :            :          */
    1557         [ +  + ]:    1730676 :         if (offset == 0)
    1558                 :    1715226 :                 try_to_release_page(page, 0);
    1559                 :            : out:
    1560                 :          0 :         return;
    1561                 :            : }
    1562                 :            : EXPORT_SYMBOL(block_invalidatepage);
    1563                 :            : 
    1564                 :            : 
    1565                 :            : /*
    1566                 :            :  * We attach and possibly dirty the buffers atomically wrt
    1567                 :            :  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
    1568                 :            :  * is already excluded via the page lock.
    1569                 :            :  */
    1570                 :          0 : void create_empty_buffers(struct page *page,
    1571                 :            :                         unsigned long blocksize, unsigned long b_state)
    1572                 :            : {
    1573                 :            :         struct buffer_head *bh, *head, *tail;
    1574                 :            : 
    1575                 :    1772285 :         head = alloc_page_buffers(page, blocksize, 1);
    1576                 :            :         bh = head;
    1577                 :            :         do {
    1578                 :    1775207 :                 bh->b_state |= b_state;
    1579                 :            :                 tail = bh;
    1580                 :    1775207 :                 bh = bh->b_this_page;
    1581         [ +  + ]:    1775207 :         } while (bh);
    1582                 :    1774082 :         tail->b_this_page = head;
    1583                 :            : 
    1584                 :    1774082 :         spin_lock(&page->mapping->private_lock);
    1585 [ +  + ][ +  + ]:    1770641 :         if (PageUptodate(page) || PageDirty(page)) {
    1586                 :            :                 bh = head;
    1587                 :            :                 do {
    1588         [ -  + ]:     132029 :                         if (PageDirty(page))
    1589                 :            :                                 set_buffer_dirty(bh);
    1590            [ + ]:     131969 :                         if (PageUptodate(page))
    1591                 :            :                                 set_buffer_uptodate(bh);
    1592                 :     132038 :                         bh = bh->b_this_page;
    1593         [ +  + ]:     132038 :                 } while (bh != head);
    1594                 :            :         }
    1595                 :            :         attach_page_buffers(page, head);
    1596                 :    1772246 :         spin_unlock(&page->mapping->private_lock);
    1597                 :    1774269 : }
    1598                 :            : EXPORT_SYMBOL(create_empty_buffers);
    1599                 :            : 
    1600                 :            : /*
    1601                 :            :  * We are taking a block for data and we don't want any output from any
    1602                 :            :  * buffer-cache aliases starting from return from that function and
    1603                 :            :  * until the moment when something will explicitly mark the buffer
    1604                 :            :  * dirty (hopefully that will not happen until we will free that block ;-)
    1605                 :            :  * We don't even need to mark it not-uptodate - nobody can expect
    1606                 :            :  * anything from a newly allocated buffer anyway. We used to used
    1607                 :            :  * unmap_buffer() for such invalidation, but that was wrong. We definitely
    1608                 :            :  * don't want to mark the alias unmapped, for example - it would confuse
    1609                 :            :  * anyone who might pick it with bread() afterwards...
    1610                 :            :  *
    1611                 :            :  * Also..  Note that bforget() doesn't lock the buffer.  So there can
    1612                 :            :  * be writeout I/O going on against recently-freed buffers.  We don't
    1613                 :            :  * wait on that I/O in bforget() - it's more efficient to wait on the I/O
    1614                 :            :  * only if we really need to.  That happens here.
    1615                 :            :  */
    1616                 :          0 : void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
    1617                 :            : {
    1618                 :            :         struct buffer_head *old_bh;
    1619                 :            : 
    1620                 :            :         might_sleep();
    1621                 :            : 
    1622                 :    2513356 :         old_bh = __find_get_block_slow(bdev, block);
    1623         [ +  + ]:    2513619 :         if (old_bh) {
    1624                 :            :                 clear_buffer_dirty(old_bh);
    1625                 :            :                 wait_on_buffer(old_bh);
    1626                 :            :                 clear_buffer_req(old_bh);
    1627                 :       1180 :                 __brelse(old_bh);
    1628                 :            :         }
    1629                 :    2513619 : }
    1630                 :            : EXPORT_SYMBOL(unmap_underlying_metadata);
    1631                 :            : 
    1632                 :            : /*
    1633                 :            :  * Size is a power-of-two in the range 512..PAGE_SIZE,
    1634                 :            :  * and the case we care about most is PAGE_SIZE.
    1635                 :            :  *
    1636                 :            :  * So this *could* possibly be written with those
    1637                 :            :  * constraints in mind (relevant mostly if some
    1638                 :            :  * architecture has a slow bit-scan instruction)
    1639                 :            :  */
    1640                 :            : static inline int block_size_bits(unsigned int blocksize)
    1641                 :            : {
    1642 [ -  + ][ #  # ]:   13006657 :         return ilog2(blocksize);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ +  - ]
            [ - ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
    1643                 :            : }
    1644                 :            : 
    1645                 :          0 : static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
    1646                 :            : {
    1647         [ -  + ]:    6502933 :         BUG_ON(!PageLocked(page));
    1648                 :            : 
    1649         [ +  + ]:    6502933 :         if (!page_has_buffers(page))
    1650                 :    1767865 :                 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
    1651         [ -  + ]:    6506356 :         return page_buffers(page);
    1652                 :            : }
    1653                 :            : 
    1654                 :            : /*
    1655                 :            :  * NOTE! All mapped/uptodate combinations are valid:
    1656                 :            :  *
    1657                 :            :  *      Mapped  Uptodate        Meaning
    1658                 :            :  *
    1659                 :            :  *      No      No              "unknown" - must do get_block()
    1660                 :            :  *      No      Yes             "hole" - zero-filled
    1661                 :            :  *      Yes     No              "allocated" - allocated on disk, not read in
    1662                 :            :  *      Yes     Yes             "valid" - allocated and up-to-date in memory.
    1663                 :            :  *
    1664                 :            :  * "Dirty" is valid only with the last case (mapped+uptodate).
    1665                 :            :  */
    1666                 :            : 
    1667                 :            : /*
    1668                 :            :  * While block_write_full_page is writing back the dirty buffers under
    1669                 :            :  * the page lock, whoever dirtied the buffers may decide to clean them
    1670                 :            :  * again at any time.  We handle that by only looking at the buffer
    1671                 :            :  * state inside lock_buffer().
    1672                 :            :  *
    1673                 :            :  * If block_write_full_page() is called for regular writeback
    1674                 :            :  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
    1675                 :            :  * locked buffer.   This only can happen if someone has written the buffer
    1676                 :            :  * directly, with submit_bh().  At the address_space level PageWriteback
    1677                 :            :  * prevents this contention from occurring.
    1678                 :            :  *
    1679                 :            :  * If block_write_full_page() is called with wbc->sync_mode ==
    1680                 :            :  * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
    1681                 :            :  * causes the writes to be flagged as synchronous writes.
    1682                 :            :  */
    1683                 :          0 : static int __block_write_full_page(struct inode *inode, struct page *page,
    1684                 :            :                         get_block_t *get_block, struct writeback_control *wbc,
    1685                 :            :                         bh_end_io_t *handler)
    1686                 :            : {
    1687                 :            :         int err;
    1688                 :            :         sector_t block;
    1689                 :            :         sector_t last_block;
    1690                 :            :         struct buffer_head *bh, *head;
    1691                 :            :         unsigned int blocksize, bbits;
    1692                 :            :         int nr_underway = 0;
    1693         [ +  + ]:      23984 :         int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
    1694                 :            :                         WRITE_SYNC : WRITE);
    1695                 :            : 
    1696                 :      23984 :         head = create_page_buffers(page, inode,
    1697                 :            :                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
    1698                 :            : 
    1699                 :            :         /*
    1700                 :            :          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
    1701                 :            :          * here, and the (potentially unmapped) buffers may become dirty at
    1702                 :            :          * any time.  If a buffer becomes dirty here after we've inspected it
    1703                 :            :          * then we just miss that fact, and the page stays dirty.
    1704                 :            :          *
    1705                 :            :          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
    1706                 :            :          * handle that here by just cleaning them.
    1707                 :            :          */
    1708                 :            : 
    1709                 :            :         bh = head;
    1710                 :      47968 :         blocksize = bh->b_size;
    1711                 :      23984 :         bbits = block_size_bits(blocksize);
    1712                 :            : 
    1713                 :      23984 :         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
    1714                 :      23984 :         last_block = (i_size_read(inode) - 1) >> bbits;
    1715                 :            : 
    1716                 :            :         /*
    1717                 :            :          * Get all the dirty buffers mapped to disk addresses and
    1718                 :            :          * handle any aliases from the underlying blockdev's mapping.
    1719                 :            :          */
    1720                 :            :         do {
    1721         [ +  + ]:      24230 :                 if (block > last_block) {
    1722                 :            :                         /*
    1723                 :            :                          * mapped buffers outside i_size will occur, because
    1724                 :            :                          * this page can be outside i_size when there is a
    1725                 :            :                          * truncate in progress.
    1726                 :            :                          */
    1727                 :            :                         /*
    1728                 :            :                          * The buffer was zeroed by block_write_full_page()
    1729                 :            :                          */
    1730                 :            :                         clear_buffer_dirty(bh);
    1731                 :            :                         set_buffer_uptodate(bh);
    1732 [ +  - ][ -  + ]:      24227 :                 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
                 [ #  # ]
    1733                 :            :                            buffer_dirty(bh)) {
    1734         [ #  # ]:          0 :                         WARN_ON(bh->b_size != blocksize);
    1735                 :          0 :                         err = get_block(inode, block, bh, 1);
    1736         [ #  # ]:          0 :                         if (err)
    1737                 :            :                                 goto recover;
    1738                 :            :                         clear_buffer_delay(bh);
    1739         [ #  # ]:          0 :                         if (buffer_new(bh)) {
    1740                 :            :                                 /* blockdev mappings never come here */
    1741                 :            :                                 clear_buffer_new(bh);
    1742                 :          0 :                                 unmap_underlying_metadata(bh->b_bdev,
    1743                 :            :                                                         bh->b_blocknr);
    1744                 :            :                         }
    1745                 :            :                 }
    1746                 :      24230 :                 bh = bh->b_this_page;
    1747                 :      24230 :                 block++;
    1748         [ +  + ]:      24230 :         } while (bh != head);
    1749                 :            : 
    1750                 :            :         do {
    1751         [ +  + ]:      24230 :                 if (!buffer_mapped(bh))
    1752                 :          3 :                         continue;
    1753                 :            :                 /*
    1754                 :            :                  * If it's a fully non-blocking write attempt and we cannot
    1755                 :            :                  * lock the buffer then redirty the page.  Note that this can
    1756                 :            :                  * potentially cause a busy-wait loop from writeback threads
    1757                 :            :                  * and kswapd activity, but those code paths have their own
    1758                 :            :                  * higher-level throttling.
    1759                 :            :                  */
    1760         [ +  + ]:      24227 :                 if (wbc->sync_mode != WB_SYNC_NONE) {
    1761                 :            :                         lock_buffer(bh);
    1762         [ +  + ]:      15261 :                 } else if (!trylock_buffer(bh)) {
    1763                 :         23 :                         redirty_page_for_writepage(wbc, page);
    1764                 :         23 :                         continue;
    1765                 :            :                 }
    1766         [ +  + ]:      24204 :                 if (test_clear_buffer_dirty(bh)) {
    1767                 :            :                         mark_buffer_async_write_endio(bh, handler);
    1768                 :            :                 } else {
    1769                 :       6133 :                         unlock_buffer(bh);
    1770                 :            :                 }
    1771         [ +  + ]:      24230 :         } while ((bh = bh->b_this_page) != head);
    1772                 :            : 
    1773                 :            :         /*
    1774                 :            :          * The page and its buffers are protected by PageWriteback(), so we can
    1775                 :            :          * drop the bh refcounts early.
    1776                 :            :          */
    1777         [ -  + ]:      23984 :         BUG_ON(PageWriteback(page));
    1778                 :            :         set_page_writeback(page);
    1779                 :            : 
    1780                 :            :         do {
    1781                 :      24230 :                 struct buffer_head *next = bh->b_this_page;
    1782         [ +  + ]:      24230 :                 if (buffer_async_write(bh)) {
    1783                 :            :                         submit_bh(write_op, bh);
    1784                 :      18070 :                         nr_underway++;
    1785                 :            :                 }
    1786                 :            :                 bh = next;
    1787         [ +  + ]:      24229 :         } while (bh != head);
    1788                 :      23983 :         unlock_page(page);
    1789                 :            : 
    1790                 :            :         err = 0;
    1791                 :            : done:
    1792         [ +  + ]:      23984 :         if (nr_underway == 0) {
    1793                 :            :                 /*
    1794                 :            :                  * The page was marked dirty, but the buffers were
    1795                 :            :                  * clean.  Someone wrote them back by hand with
    1796                 :            :                  * ll_rw_block/submit_bh.  A rare case.
    1797                 :            :                  */
    1798                 :       5940 :                 end_page_writeback(page);
    1799                 :            : 
    1800                 :            :                 /*
    1801                 :            :                  * The page and buffer_heads can be released at any time from
    1802                 :            :                  * here on.
    1803                 :            :                  */
    1804                 :            :         }
    1805                 :      23984 :         return err;
    1806                 :            : 
    1807                 :            : recover:
    1808                 :            :         /*
    1809                 :            :          * ENOSPC, or some other error.  We may already have added some
    1810                 :            :          * blocks to the file, so we need to write these out to avoid
    1811                 :            :          * exposing stale data.
    1812                 :            :          * The page is currently locked and not marked for writeback
    1813                 :            :          */
    1814                 :            :         bh = head;
    1815                 :            :         /* Recovery: lock and submit the mapped buffers */
    1816                 :            :         do {
    1817 [ #  # ][ #  # ]:          0 :                 if (buffer_mapped(bh) && buffer_dirty(bh) &&
                 [ #  # ]
    1818                 :            :                     !buffer_delay(bh)) {
    1819                 :            :                         lock_buffer(bh);
    1820                 :            :                         mark_buffer_async_write_endio(bh, handler);
    1821                 :            :                 } else {
    1822                 :            :                         /*
    1823                 :            :                          * The buffer may have been set dirty during
    1824                 :            :                          * attachment to a dirty page.
    1825                 :            :                          */
    1826                 :            :                         clear_buffer_dirty(bh);
    1827                 :            :                 }
    1828         [ #  # ]:          0 :         } while ((bh = bh->b_this_page) != head);
    1829                 :            :         SetPageError(page);
    1830         [ #  # ]:          0 :         BUG_ON(PageWriteback(page));
    1831                 :          0 :         mapping_set_error(page->mapping, err);
    1832                 :            :         set_page_writeback(page);
    1833                 :            :         do {
    1834                 :          0 :                 struct buffer_head *next = bh->b_this_page;
    1835         [ #  # ]:          0 :                 if (buffer_async_write(bh)) {
    1836                 :            :                         clear_buffer_dirty(bh);
    1837                 :            :                         submit_bh(write_op, bh);
    1838                 :          0 :                         nr_underway++;
    1839                 :            :                 }
    1840                 :            :                 bh = next;
    1841         [ #  # ]:          0 :         } while (bh != head);
    1842                 :          0 :         unlock_page(page);
    1843                 :          0 :         goto done;
    1844                 :            : }
    1845                 :            : 
    1846                 :            : /*
    1847                 :            :  * If a page has any new buffers, zero them out here, and mark them uptodate
    1848                 :            :  * and dirty so they'll be written out (in order to prevent uninitialised
    1849                 :            :  * block data from leaking). And clear the new bit.
    1850                 :            :  */
    1851                 :          0 : void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
    1852                 :            : {
    1853                 :            :         unsigned int block_start, block_end;
    1854                 :            :         struct buffer_head *head, *bh;
    1855                 :            : 
    1856         [ -  + ]:          8 :         BUG_ON(!PageLocked(page));
    1857         [ +  - ]:          8 :         if (!page_has_buffers(page))
    1858                 :          0 :                 return;
    1859                 :            : 
    1860         [ -  + ]:          8 :         bh = head = page_buffers(page);
    1861                 :            :         block_start = 0;
    1862                 :            :         do {
    1863                 :          8 :                 block_end = block_start + bh->b_size;
    1864                 :            : 
    1865         [ +  + ]:          8 :                 if (buffer_new(bh)) {
    1866         [ +  - ]:          2 :                         if (block_end > from && block_start < to) {
    1867         [ +  - ]:          2 :                                 if (!PageUptodate(page)) {
    1868                 :            :                                         unsigned start, size;
    1869                 :            : 
    1870                 :          2 :                                         start = max(from, block_start);
    1871                 :          2 :                                         size = min(to, block_end) - start;
    1872                 :            : 
    1873                 :            :                                         zero_user(page, start, size);
    1874                 :            :                                         set_buffer_uptodate(bh);
    1875                 :            :                                 }
    1876                 :            : 
    1877                 :            :                                 clear_buffer_new(bh);
    1878                 :          2 :                                 mark_buffer_dirty(bh);
    1879                 :            :                         }
    1880                 :            :                 }
    1881                 :            : 
    1882                 :            :                 block_start = block_end;
    1883                 :          8 :                 bh = bh->b_this_page;
    1884         [ -  + ]:          8 :         } while (bh != head);
    1885                 :            : }
    1886                 :            : EXPORT_SYMBOL(page_zero_new_buffers);
    1887                 :            : 
    1888                 :          0 : int __block_write_begin(struct page *page, loff_t pos, unsigned len,
    1889                 :            :                 get_block_t *get_block)
    1890                 :            : {
    1891                 :    6477114 :         unsigned from = pos & (PAGE_CACHE_SIZE - 1);
    1892                 :    6477114 :         unsigned to = from + len;
    1893                 :    6477114 :         struct inode *inode = page->mapping->host;
    1894                 :            :         unsigned block_start, block_end;
    1895                 :            :         sector_t block;
    1896                 :            :         int err = 0;
    1897                 :            :         unsigned blocksize, bbits;
    1898                 :            :         struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
    1899                 :            : 
    1900         [ -  + ]:    6477114 :         BUG_ON(!PageLocked(page));
    1901                 :            :         BUG_ON(from > PAGE_CACHE_SIZE);
    1902         [ -  + ]:    6477114 :         BUG_ON(to > PAGE_CACHE_SIZE);
    1903         [ -  + ]:    6477114 :         BUG_ON(from > to);
    1904                 :            : 
    1905                 :    6477114 :         head = create_page_buffers(page, inode, 0);
    1906                 :   12953979 :         blocksize = head->b_size;
    1907                 :    6476865 :         bbits = block_size_bits(blocksize);
    1908                 :            : 
    1909                 :    6476865 :         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
    1910                 :            : 
    1911         [ +  + ]:   12952086 :         for(bh = head, block_start = 0; bh != head || !block_start;
    1912                 :    6475221 :             block++, block_start=block_end, bh = bh->b_this_page) {
    1913                 :    6477399 :                 block_end = block_start + blocksize;
    1914         [ +  + ]:    6477399 :                 if (block_end <= from || block_start >= to) {
    1915            [ + ]:          0 :                         if (PageUptodate(page)) {
    1916         [ -  + ]:         33 :                                 if (!buffer_uptodate(bh))
    1917                 :            :                                         set_buffer_uptodate(bh);
    1918                 :            :                         }
    1919                 :          0 :                         continue;
    1920                 :            :                 }
    1921         [ -  + ]:    6477068 :                 if (buffer_new(bh))
    1922                 :            :                         clear_buffer_new(bh);
    1923         [ +  + ]:    6480746 :                 if (!buffer_mapped(bh)) {
    1924         [ -  + ]:    1771121 :                         WARN_ON(bh->b_size != blocksize);
    1925                 :    1771121 :                         err = get_block(inode, block, bh, 1);
    1926            [ + ]:    8243572 :                         if (err)
    1927                 :            :                                 break;
    1928         [ +  + ]:    1768902 :                         if (buffer_new(bh)) {
    1929                 :    1766665 :                                 unmap_underlying_metadata(bh->b_bdev,
    1930                 :            :                                                         bh->b_blocknr);
    1931         [ +  + ]:    1766388 :                                 if (PageUptodate(page)) {
    1932                 :     132648 :                                         clear_buffer_new(bh);
    1933                 :     132665 :                                         set_buffer_uptodate(bh);
    1934                 :     132676 :                                         mark_buffer_dirty(bh);
    1935                 :     132716 :                                         continue;
    1936                 :            :                                 }
    1937         [ +  + ]:    1633740 :                                 if (block_end > to || block_start < from)
    1938                 :            :                                         zero_user_segments(page,
    1939                 :            :                                                 to, block_end,
    1940                 :            :                                                 block_start, from);
    1941                 :    1631851 :                                 continue;
    1942                 :            :                         }
    1943                 :            :                 }
    1944         [ +  + ]:    4709268 :                 if (PageUptodate(page)) {
    1945         [ -  + ]:    4705553 :                         if (!buffer_uptodate(bh))
    1946                 :            :                                 set_buffer_uptodate(bh);
    1947                 :    4707427 :                         continue; 
    1948                 :            :                 }
    1949 [ +  - ][ +  - ]:       3715 :                 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
                 [ +  - ]
    1950         [ +  + ]:       3715 :                     !buffer_unwritten(bh) &&
    1951                 :       3715 :                      (block_start < from || block_end > to)) {
    1952                 :        540 :                         ll_rw_block(READ, 1, &bh);
    1953                 :        540 :                         *wait_bh++=bh;
    1954                 :            :                 }
    1955                 :            :         }
    1956                 :            :         /*
    1957                 :            :          * If we issued read requests - let them complete.
    1958                 :            :          */
    1959         [ +  + ]:    6475210 :         while(wait_bh > wait) {
    1960                 :        540 :                 wait_on_buffer(*--wait_bh);
    1961         [ -  + ]:        540 :                 if (!buffer_uptodate(*wait_bh))
    1962                 :            :                         err = -EIO;
    1963                 :            :         }
    1964         [ -  + ]:    6474670 :         if (unlikely(err))
    1965                 :          0 :                 page_zero_new_buffers(page, from, to);
    1966                 :    6474670 :         return err;
    1967                 :            : }
    1968                 :            : EXPORT_SYMBOL(__block_write_begin);
    1969                 :            : 
    1970                 :          0 : static int __block_commit_write(struct inode *inode, struct page *page,
    1971                 :            :                 unsigned from, unsigned to)
    1972                 :            : {
    1973                 :            :         unsigned block_start, block_end;
    1974                 :            :         int partial = 0;
    1975                 :            :         unsigned blocksize;
    1976                 :            :         struct buffer_head *bh, *head;
    1977                 :            : 
    1978         [ -  + ]:    6477211 :         bh = head = page_buffers(page);
    1979                 :    6477211 :         blocksize = bh->b_size;
    1980                 :            : 
    1981                 :            :         block_start = 0;
    1982                 :            :         do {
    1983                 :    6476570 :                 block_end = block_start + blocksize;
    1984         [ +  + ]:    6476570 :                 if (block_end <= from || block_start >= to) {
    1985         [ +  + ]:         65 :                         if (!buffer_uptodate(bh))
    1986                 :            :                                 partial = 1;
    1987                 :            :                 } else {
    1988                 :            :                         set_buffer_uptodate(bh);
    1989                 :    6475941 :                         mark_buffer_dirty(bh);
    1990                 :            :                 }
    1991                 :            :                 clear_buffer_new(bh);
    1992                 :            : 
    1993                 :            :                 block_start = block_end;
    1994                 :    6476716 :                 bh = bh->b_this_page;
    1995            [ + ]:    6476716 :         } while (bh != head);
    1996                 :            : 
    1997                 :            :         /*
    1998                 :            :          * If this is a partial write which happened to make all buffers
    1999                 :            :          * uptodate then we can optimize away a bogus readpage() for
    2000                 :            :          * the next read(). Here we 'discover' whether the page went
    2001                 :            :          * uptodate as a result of this (potentially partial) write.
    2002                 :            :          */
    2003         [ +  + ]:    6477357 :         if (!partial)
    2004                 :            :                 SetPageUptodate(page);
    2005                 :    6475672 :         return 0;
    2006                 :            : }
    2007                 :            : 
    2008                 :            : /*
    2009                 :            :  * block_write_begin takes care of the basic task of block allocation and
    2010                 :            :  * bringing partial write blocks uptodate first.
    2011                 :            :  *
    2012                 :            :  * The filesystem needs to handle block truncation upon failure.
    2013                 :            :  */
    2014                 :          0 : int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
    2015                 :            :                 unsigned flags, struct page **pagep, get_block_t *get_block)
    2016                 :            : {
    2017                 :       5271 :         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
    2018                 :            :         struct page *page;
    2019                 :            :         int status;
    2020                 :            : 
    2021                 :       5271 :         page = grab_cache_page_write_begin(mapping, index, flags);
    2022         [ +  - ]:       5271 :         if (!page)
    2023                 :            :                 return -ENOMEM;
    2024                 :            : 
    2025                 :       5271 :         status = __block_write_begin(page, pos, len, get_block);
    2026         [ -  + ]:       5271 :         if (unlikely(status)) {
    2027                 :          0 :                 unlock_page(page);
    2028                 :          0 :                 page_cache_release(page);
    2029                 :            :                 page = NULL;
    2030                 :            :         }
    2031                 :            : 
    2032                 :       5271 :         *pagep = page;
    2033                 :       5271 :         return status;
    2034                 :            : }
    2035                 :            : EXPORT_SYMBOL(block_write_begin);
    2036                 :            : 
    2037                 :          0 : int block_write_end(struct file *file, struct address_space *mapping,
    2038                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2039                 :            :                         struct page *page, void *fsdata)
    2040                 :            : {
    2041                 :            :         struct inode *inode = mapping->host;
    2042                 :            :         unsigned start;
    2043                 :            : 
    2044                 :    6270043 :         start = pos & (PAGE_CACHE_SIZE - 1);
    2045                 :            : 
    2046         [ +  + ]:    6270043 :         if (unlikely(copied < len)) {
    2047                 :            :                 /*
    2048                 :            :                  * The buffers that were written will now be uptodate, so we
    2049                 :            :                  * don't have to worry about a readpage reading them and
    2050                 :            :                  * overwriting a partial write. However if we have encountered
    2051                 :            :                  * a short write and only partially written into a buffer, it
    2052                 :            :                  * will not be marked uptodate, so a readpage might come in and
    2053                 :            :                  * destroy our partial write.
    2054                 :            :                  *
    2055                 :            :                  * Do the simplest thing, and just treat any short write to a
    2056                 :            :                  * non uptodate page as a zero-length write, and force the
    2057                 :            :                  * caller to redo the whole thing.
    2058                 :            :                  */
    2059         [ +  + ]:    6270051 :                 if (!PageUptodate(page))
    2060                 :            :                         copied = 0;
    2061                 :            : 
    2062                 :          8 :                 page_zero_new_buffers(page, start+copied, start+len);
    2063                 :            :         }
    2064                 :    6270043 :         flush_dcache_page(page);
    2065                 :            : 
    2066                 :            :         /* This could be a short (even 0-length) commit */
    2067                 :    6272154 :         __block_commit_write(inode, page, start, start+copied);
    2068                 :            : 
    2069                 :    6269739 :         return copied;
    2070                 :            : }
    2071                 :            : EXPORT_SYMBOL(block_write_end);
    2072                 :            : 
    2073                 :          0 : int generic_write_end(struct file *file, struct address_space *mapping,
    2074                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2075                 :            :                         struct page *page, void *fsdata)
    2076                 :            : {
    2077                 :    6265384 :         struct inode *inode = mapping->host;
    2078                 :            :         int i_size_changed = 0;
    2079                 :            : 
    2080                 :    6265384 :         copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
    2081                 :            : 
    2082                 :            :         /*
    2083                 :            :          * No need to use i_size_read() here, the i_size
    2084                 :            :          * cannot change under us because we hold i_mutex.
    2085                 :            :          *
    2086                 :            :          * But it's important to update i_size while still holding page lock:
    2087                 :            :          * page writeout could otherwise come in and zero beyond i_size.
    2088                 :            :          */
    2089         [ +  + ]:    6264242 :         if (pos+copied > inode->i_size) {
    2090                 :            :                 i_size_write(inode, pos+copied);
    2091                 :            :                 i_size_changed = 1;
    2092                 :            :         }
    2093                 :            : 
    2094                 :    6260560 :         unlock_page(page);
    2095                 :    6263288 :         page_cache_release(page);
    2096                 :            : 
    2097                 :            :         /*
    2098                 :            :          * Don't mark the inode dirty under page lock. First, it unnecessarily
    2099                 :            :          * makes the holding time of page lock longer. Second, it forces lock
    2100                 :            :          * ordering of page lock and transaction start for journaling
    2101                 :            :          * filesystems.
    2102                 :            :          */
    2103         [ +  + ]:    6258187 :         if (i_size_changed)
    2104                 :            :                 mark_inode_dirty(inode);
    2105                 :            : 
    2106                 :    6258445 :         return copied;
    2107                 :            : }
    2108                 :            : EXPORT_SYMBOL(generic_write_end);
    2109                 :            : 
    2110                 :            : /*
    2111                 :            :  * block_is_partially_uptodate checks whether buffers within a page are
    2112                 :            :  * uptodate or not.
    2113                 :            :  *
    2114                 :            :  * Returns true if all buffers which correspond to a file portion
    2115                 :            :  * we want to read are uptodate.
    2116                 :            :  */
    2117                 :          0 : int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
    2118                 :            :                                         unsigned long from)
    2119                 :            : {
    2120                 :            :         unsigned block_start, block_end, blocksize;
    2121                 :            :         unsigned to;
    2122                 :            :         struct buffer_head *bh, *head;
    2123                 :            :         int ret = 1;
    2124                 :            : 
    2125         [ +  - ]:          4 :         if (!page_has_buffers(page))
    2126                 :            :                 return 0;
    2127                 :            : 
    2128         [ -  + ]:          4 :         head = page_buffers(page);
    2129                 :          4 :         blocksize = head->b_size;
    2130                 :          4 :         to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
    2131                 :          4 :         to = from + to;
    2132    [ +  - ][ + ]:          4 :         if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
    2133                 :            :                 return 0;
    2134                 :            : 
    2135                 :            :         bh = head;
    2136                 :            :         block_start = 0;
    2137                 :            :         do {
    2138                 :          3 :                 block_end = block_start + blocksize;
    2139         [ +  - ]:          3 :                 if (block_end > from && block_start < to) {
    2140         [ +  - ]:          3 :                         if (!buffer_uptodate(bh)) {
    2141                 :            :                                 ret = 0;
    2142                 :            :                                 break;
    2143                 :            :                         }
    2144         [ -  + ]:          3 :                         if (block_end >= to)
    2145                 :            :                                 break;
    2146                 :            :                 }
    2147                 :            :                 block_start = block_end;
    2148                 :          0 :                 bh = bh->b_this_page;
    2149         [ #  # ]:          0 :         } while (bh != head);
    2150                 :            : 
    2151                 :          3 :         return ret;
    2152                 :            : }
    2153                 :            : EXPORT_SYMBOL(block_is_partially_uptodate);
    2154                 :            : 
    2155                 :            : /*
    2156                 :            :  * Generic "read page" function for block devices that have the normal
    2157                 :            :  * get_block functionality. This is most of the block device filesystems.
    2158                 :            :  * Reads the page asynchronously --- the unlock_buffer() and
    2159                 :            :  * set/clear_buffer_uptodate() functions propagate buffer state into the
    2160                 :            :  * page struct once IO has completed.
    2161                 :            :  */
    2162                 :          0 : int block_read_full_page(struct page *page, get_block_t *get_block)
    2163                 :            : {
    2164                 :       2355 :         struct inode *inode = page->mapping->host;
    2165                 :            :         sector_t iblock, lblock;
    2166                 :            :         struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
    2167                 :            :         unsigned int blocksize, bbits;
    2168                 :            :         int nr, i;
    2169                 :            :         int fully_mapped = 1;
    2170                 :            : 
    2171                 :       2355 :         head = create_page_buffers(page, inode, 0);
    2172                 :       4710 :         blocksize = head->b_size;
    2173                 :       2355 :         bbits = block_size_bits(blocksize);
    2174                 :            : 
    2175                 :       2355 :         iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
    2176                 :       2355 :         lblock = (i_size_read(inode)+blocksize-1) >> bbits;
    2177                 :            :         bh = head;
    2178                 :            :         nr = 0;
    2179                 :            :         i = 0;
    2180                 :            : 
    2181                 :            :         do {
    2182         [ +  + ]:       3468 :                 if (buffer_uptodate(bh))
    2183                 :         29 :                         continue;
    2184                 :            : 
    2185         [ +  + ]:       3439 :                 if (!buffer_mapped(bh)) {
    2186                 :            :                         int err = 0;
    2187                 :            : 
    2188                 :            :                         fully_mapped = 0;
    2189         [ +  + ]:       3413 :                         if (iblock < lblock) {
    2190         [ -  + ]:       3410 :                                 WARN_ON(bh->b_size != blocksize);
    2191                 :       3410 :                                 err = get_block(inode, iblock, bh, 0);
    2192         [ -  + ]:       3410 :                                 if (err)
    2193                 :            :                                         SetPageError(page);
    2194                 :            :                         }
    2195         [ +  + ]:       3413 :                         if (!buffer_mapped(bh)) {
    2196                 :       1079 :                                 zero_user(page, i * blocksize, blocksize);
    2197         [ +  - ]:       1079 :                                 if (!err)
    2198                 :            :                                         set_buffer_uptodate(bh);
    2199                 :       1079 :                                 continue;
    2200                 :            :                         }
    2201                 :            :                         /*
    2202                 :            :                          * get_block() might have updated the buffer
    2203                 :            :                          * synchronously
    2204                 :            :                          */
    2205         [ -  + ]:       2334 :                         if (buffer_uptodate(bh))
    2206                 :          0 :                                 continue;
    2207                 :            :                 }
    2208                 :       2360 :                 arr[nr++] = bh;
    2209         [ +  + ]:       3468 :         } while (i++, iblock++, (bh = bh->b_this_page) != head);
    2210                 :            : 
    2211         [ +  + ]:       2355 :         if (fully_mapped)
    2212                 :            :                 SetPageMappedToDisk(page);
    2213                 :            : 
    2214         [ +  + ]:       2355 :         if (!nr) {
    2215                 :            :                 /*
    2216                 :            :                  * All buffers are uptodate - we can set the page uptodate
    2217                 :            :                  * as well. But not if get_block() returned an error.
    2218                 :            :                  */
    2219         [ +  - ]:       1079 :                 if (!PageError(page))
    2220                 :            :                         SetPageUptodate(page);
    2221                 :       1079 :                 unlock_page(page);
    2222                 :       1079 :                 return 0;
    2223                 :            :         }
    2224                 :            : 
    2225                 :            :         /* Stage two: lock the buffers */
    2226         [ +  + ]:       3636 :         for (i = 0; i < nr; i++) {
    2227                 :       2360 :                 bh = arr[i];
    2228                 :            :                 lock_buffer(bh);
    2229                 :            :                 mark_buffer_async_read(bh);
    2230                 :            :         }
    2231                 :            : 
    2232                 :            :         /*
    2233                 :            :          * Stage 3: start the IO.  Check for uptodateness
    2234                 :            :          * inside the buffer lock in case another process reading
    2235                 :            :          * the underlying blockdev brought it uptodate (the sct fix).
    2236                 :            :          */
    2237         [ +  + ]:       3636 :         for (i = 0; i < nr; i++) {
    2238                 :       2360 :                 bh = arr[i];
    2239         [ -  + ]:       2360 :                 if (buffer_uptodate(bh))
    2240                 :          0 :                         end_buffer_async_read(bh, 1);
    2241                 :            :                 else
    2242                 :            :                         submit_bh(READ, bh);
    2243                 :            :         }
    2244                 :            :         return 0;
    2245                 :            : }
    2246                 :            : EXPORT_SYMBOL(block_read_full_page);
    2247                 :            : 
    2248                 :            : /* utility function for filesystems that need to do work on expanding
    2249                 :            :  * truncates.  Uses filesystem pagecache writes to allow the filesystem to
    2250                 :            :  * deal with the hole.  
    2251                 :            :  */
    2252                 :          0 : int generic_cont_expand_simple(struct inode *inode, loff_t size)
    2253                 :            : {
    2254                 :          0 :         struct address_space *mapping = inode->i_mapping;
    2255                 :            :         struct page *page;
    2256                 :            :         void *fsdata;
    2257                 :            :         int err;
    2258                 :            : 
    2259                 :          0 :         err = inode_newsize_ok(inode, size);
    2260         [ #  # ]:          0 :         if (err)
    2261                 :            :                 goto out;
    2262                 :            : 
    2263                 :          0 :         err = pagecache_write_begin(NULL, mapping, size, 0,
    2264                 :            :                                 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
    2265                 :            :                                 &page, &fsdata);
    2266         [ #  # ]:          0 :         if (err)
    2267                 :            :                 goto out;
    2268                 :            : 
    2269                 :          0 :         err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
    2270         [ #  # ]:          0 :         BUG_ON(err > 0);
    2271                 :            : 
    2272                 :            : out:
    2273                 :          0 :         return err;
    2274                 :            : }
    2275                 :            : EXPORT_SYMBOL(generic_cont_expand_simple);
    2276                 :            : 
    2277                 :          0 : static int cont_expand_zero(struct file *file, struct address_space *mapping,
    2278                 :            :                             loff_t pos, loff_t *bytes)
    2279                 :            : {
    2280                 :          0 :         struct inode *inode = mapping->host;
    2281                 :          0 :         unsigned blocksize = 1 << inode->i_blkbits;
    2282                 :            :         struct page *page;
    2283                 :            :         void *fsdata;
    2284                 :            :         pgoff_t index, curidx;
    2285                 :            :         loff_t curpos;
    2286                 :            :         unsigned zerofrom, offset, len;
    2287                 :            :         int err = 0;
    2288                 :            : 
    2289                 :          0 :         index = pos >> PAGE_CACHE_SHIFT;
    2290                 :          0 :         offset = pos & ~PAGE_CACHE_MASK;
    2291                 :            : 
    2292         [ #  # ]:          0 :         while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
    2293                 :          0 :                 zerofrom = curpos & ~PAGE_CACHE_MASK;
    2294         [ #  # ]:          0 :                 if (zerofrom & (blocksize-1)) {
    2295                 :          0 :                         *bytes |= (blocksize-1);
    2296                 :          0 :                         (*bytes)++;
    2297                 :            :                 }
    2298                 :          0 :                 len = PAGE_CACHE_SIZE - zerofrom;
    2299                 :            : 
    2300                 :          0 :                 err = pagecache_write_begin(file, mapping, curpos, len,
    2301                 :            :                                                 AOP_FLAG_UNINTERRUPTIBLE,
    2302                 :            :                                                 &page, &fsdata);
    2303         [ #  # ]:          0 :                 if (err)
    2304                 :            :                         goto out;
    2305                 :          0 :                 zero_user(page, zerofrom, len);
    2306                 :          0 :                 err = pagecache_write_end(file, mapping, curpos, len, len,
    2307                 :            :                                                 page, fsdata);
    2308         [ #  # ]:          0 :                 if (err < 0)
    2309                 :            :                         goto out;
    2310         [ #  # ]:          0 :                 BUG_ON(err != len);
    2311                 :            :                 err = 0;
    2312                 :            : 
    2313                 :          0 :                 balance_dirty_pages_ratelimited(mapping);
    2314                 :            :         }
    2315                 :            : 
    2316                 :            :         /* page covers the boundary, find the boundary offset */
    2317         [ #  # ]:          0 :         if (index == curidx) {
    2318                 :          0 :                 zerofrom = curpos & ~PAGE_CACHE_MASK;
    2319                 :            :                 /* if we will expand the thing last block will be filled */
    2320         [ #  # ]:          0 :                 if (offset <= zerofrom) {
    2321                 :            :                         goto out;
    2322                 :            :                 }
    2323         [ #  # ]:          0 :                 if (zerofrom & (blocksize-1)) {
    2324                 :          0 :                         *bytes |= (blocksize-1);
    2325                 :          0 :                         (*bytes)++;
    2326                 :            :                 }
    2327                 :          0 :                 len = offset - zerofrom;
    2328                 :            : 
    2329                 :          0 :                 err = pagecache_write_begin(file, mapping, curpos, len,
    2330                 :            :                                                 AOP_FLAG_UNINTERRUPTIBLE,
    2331                 :            :                                                 &page, &fsdata);
    2332         [ #  # ]:          0 :                 if (err)
    2333                 :            :                         goto out;
    2334                 :          0 :                 zero_user(page, zerofrom, len);
    2335                 :          0 :                 err = pagecache_write_end(file, mapping, curpos, len, len,
    2336                 :            :                                                 page, fsdata);
    2337         [ #  # ]:          0 :                 if (err < 0)
    2338                 :            :                         goto out;
    2339         [ #  # ]:          0 :                 BUG_ON(err != len);
    2340                 :            :                 err = 0;
    2341                 :            :         }
    2342                 :            : out:
    2343                 :          0 :         return err;
    2344                 :            : }
    2345                 :            : 
    2346                 :            : /*
    2347                 :            :  * For moronic filesystems that do not allow holes in file.
    2348                 :            :  * We may have to extend the file.
    2349                 :            :  */
    2350                 :          0 : int cont_write_begin(struct file *file, struct address_space *mapping,
    2351                 :            :                         loff_t pos, unsigned len, unsigned flags,
    2352                 :            :                         struct page **pagep, void **fsdata,
    2353                 :            :                         get_block_t *get_block, loff_t *bytes)
    2354                 :            : {
    2355                 :          0 :         struct inode *inode = mapping->host;
    2356                 :          0 :         unsigned blocksize = 1 << inode->i_blkbits;
    2357                 :            :         unsigned zerofrom;
    2358                 :            :         int err;
    2359                 :            : 
    2360                 :          0 :         err = cont_expand_zero(file, mapping, pos, bytes);
    2361         [ #  # ]:          0 :         if (err)
    2362                 :            :                 return err;
    2363                 :            : 
    2364                 :          0 :         zerofrom = *bytes & ~PAGE_CACHE_MASK;
    2365 [ #  # ][ #  # ]:          0 :         if (pos+len > *bytes && zerofrom & (blocksize-1)) {
    2366                 :          0 :                 *bytes |= (blocksize-1);
    2367                 :          0 :                 (*bytes)++;
    2368                 :            :         }
    2369                 :            : 
    2370                 :          0 :         return block_write_begin(mapping, pos, len, flags, pagep, get_block);
    2371                 :            : }
    2372                 :            : EXPORT_SYMBOL(cont_write_begin);
    2373                 :            : 
    2374                 :          0 : int block_commit_write(struct page *page, unsigned from, unsigned to)
    2375                 :            : {
    2376                 :            :         struct inode *inode = page->mapping->host;
    2377                 :     205271 :         __block_commit_write(inode,page,from,to);
    2378                 :          0 :         return 0;
    2379                 :            : }
    2380                 :            : EXPORT_SYMBOL(block_commit_write);
    2381                 :            : 
    2382                 :            : /*
    2383                 :            :  * block_page_mkwrite() is not allowed to change the file size as it gets
    2384                 :            :  * called from a page fault handler when a page is first dirtied. Hence we must
    2385                 :            :  * be careful to check for EOF conditions here. We set the page up correctly
    2386                 :            :  * for a written page which means we get ENOSPC checking when writing into
    2387                 :            :  * holes and correct delalloc and unwritten extent mapping on filesystems that
    2388                 :            :  * support these features.
    2389                 :            :  *
    2390                 :            :  * We are not allowed to take the i_mutex here so we have to play games to
    2391                 :            :  * protect against truncate races as the page could now be beyond EOF.  Because
    2392                 :            :  * truncate writes the inode size before removing pages, once we have the
    2393                 :            :  * page lock we can determine safely if the page is beyond EOF. If it is not
    2394                 :            :  * beyond EOF, then the page is guaranteed safe against truncation until we
    2395                 :            :  * unlock the page.
    2396                 :            :  *
    2397                 :            :  * Direct callers of this function should protect against filesystem freezing
    2398                 :            :  * using sb_start_write() - sb_end_write() functions.
    2399                 :            :  */
    2400                 :          0 : int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
    2401                 :            :                          get_block_t get_block)
    2402                 :            : {
    2403                 :     410636 :         struct page *page = vmf->page;
    2404                 :     205086 :         struct inode *inode = file_inode(vma->vm_file);
    2405                 :            :         unsigned long end;
    2406                 :            :         loff_t size;
    2407                 :            :         int ret;
    2408                 :            : 
    2409                 :            :         lock_page(page);
    2410                 :            :         size = i_size_read(inode);
    2411       [ + ][ + ]:     205440 :         if ((page->mapping != inode->i_mapping) ||
    2412                 :            :             (page_offset(page) > size)) {
    2413                 :            :                 /* We overload EFAULT to mean page got truncated */
    2414                 :            :                 ret = -EFAULT;
    2415                 :            :                 goto out_unlock;
    2416                 :            :         }
    2417                 :            : 
    2418                 :            :         /* page is wholly or partially inside EOF */
    2419         [ +  + ]:     205555 :         if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
    2420                 :      54478 :                 end = size & ~PAGE_CACHE_MASK;
    2421                 :            :         else
    2422                 :            :                 end = PAGE_CACHE_SIZE;
    2423                 :            : 
    2424                 :     205555 :         ret = __block_write_begin(page, 0, end, get_block);
    2425            [ + ]:     205198 :         if (!ret)
    2426                 :            :                 ret = block_commit_write(page, 0, end);
    2427                 :            : 
    2428         [ +  - ]:     205147 :         if (unlikely(ret < 0))
    2429                 :            :                 goto out_unlock;
    2430                 :     205147 :         set_page_dirty(page);
    2431                 :     205601 :         wait_for_stable_page(page);
    2432                 :     205598 :         return 0;
    2433                 :            : out_unlock:
    2434                 :          0 :         unlock_page(page);
    2435                 :          0 :         return ret;
    2436                 :            : }
    2437                 :            : EXPORT_SYMBOL(__block_page_mkwrite);
    2438                 :            : 
    2439                 :          0 : int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
    2440                 :            :                    get_block_t get_block)
    2441                 :            : {
    2442                 :            :         int ret;
    2443                 :          0 :         struct super_block *sb = file_inode(vma->vm_file)->i_sb;
    2444                 :            : 
    2445                 :            :         sb_start_pagefault(sb);
    2446                 :            : 
    2447                 :            :         /*
    2448                 :            :          * Update file times before taking page lock. We may end up failing the
    2449                 :            :          * fault so this update may be superfluous but who really cares...
    2450                 :            :          */
    2451                 :          0 :         file_update_time(vma->vm_file);
    2452                 :            : 
    2453                 :          0 :         ret = __block_page_mkwrite(vma, vmf, get_block);
    2454                 :            :         sb_end_pagefault(sb);
    2455                 :          0 :         return block_page_mkwrite_return(ret);
    2456                 :            : }
    2457                 :            : EXPORT_SYMBOL(block_page_mkwrite);
    2458                 :            : 
    2459                 :            : /*
    2460                 :            :  * nobh_write_begin()'s prereads are special: the buffer_heads are freed
    2461                 :            :  * immediately, while under the page lock.  So it needs a special end_io
    2462                 :            :  * handler which does not touch the bh after unlocking it.
    2463                 :            :  */
    2464                 :          0 : static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
    2465                 :            : {
    2466                 :          0 :         __end_buffer_read_notouch(bh, uptodate);
    2467                 :          0 : }
    2468                 :            : 
    2469                 :            : /*
    2470                 :            :  * Attach the singly-linked list of buffers created by nobh_write_begin, to
    2471                 :            :  * the page (converting it to circular linked list and taking care of page
    2472                 :            :  * dirty races).
    2473                 :            :  */
    2474                 :          0 : static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
    2475                 :            : {
    2476                 :            :         struct buffer_head *bh;
    2477                 :            : 
    2478         [ #  # ]:          0 :         BUG_ON(!PageLocked(page));
    2479                 :            : 
    2480                 :          0 :         spin_lock(&page->mapping->private_lock);
    2481                 :            :         bh = head;
    2482                 :            :         do {
    2483         [ #  # ]:          0 :                 if (PageDirty(page))
    2484                 :            :                         set_buffer_dirty(bh);
    2485         [ #  # ]:          0 :                 if (!bh->b_this_page)
    2486                 :          0 :                         bh->b_this_page = head;
    2487                 :          0 :                 bh = bh->b_this_page;
    2488         [ #  # ]:          0 :         } while (bh != head);
    2489                 :            :         attach_page_buffers(page, head);
    2490                 :          0 :         spin_unlock(&page->mapping->private_lock);
    2491                 :          0 : }
    2492                 :            : 
    2493                 :            : /*
    2494                 :            :  * On entry, the page is fully not uptodate.
    2495                 :            :  * On exit the page is fully uptodate in the areas outside (from,to)
    2496                 :            :  * The filesystem needs to handle block truncation upon failure.
    2497                 :            :  */
    2498                 :          0 : int nobh_write_begin(struct address_space *mapping,
    2499                 :            :                         loff_t pos, unsigned len, unsigned flags,
    2500                 :            :                         struct page **pagep, void **fsdata,
    2501                 :            :                         get_block_t *get_block)
    2502                 :            : {
    2503                 :          0 :         struct inode *inode = mapping->host;
    2504                 :          0 :         const unsigned blkbits = inode->i_blkbits;
    2505                 :          0 :         const unsigned blocksize = 1 << blkbits;
    2506                 :            :         struct buffer_head *head, *bh;
    2507                 :            :         struct page *page;
    2508                 :            :         pgoff_t index;
    2509                 :            :         unsigned from, to;
    2510                 :            :         unsigned block_in_page;
    2511                 :            :         unsigned block_start, block_end;
    2512                 :            :         sector_t block_in_file;
    2513                 :            :         int nr_reads = 0;
    2514                 :            :         int ret = 0;
    2515                 :            :         int is_mapped_to_disk = 1;
    2516                 :            : 
    2517                 :          0 :         index = pos >> PAGE_CACHE_SHIFT;
    2518                 :          0 :         from = pos & (PAGE_CACHE_SIZE - 1);
    2519                 :          0 :         to = from + len;
    2520                 :            : 
    2521                 :          0 :         page = grab_cache_page_write_begin(mapping, index, flags);
    2522         [ #  # ]:          0 :         if (!page)
    2523                 :            :                 return -ENOMEM;
    2524                 :          0 :         *pagep = page;
    2525                 :          0 :         *fsdata = NULL;
    2526                 :            : 
    2527         [ #  # ]:          0 :         if (page_has_buffers(page)) {
    2528                 :          0 :                 ret = __block_write_begin(page, pos, len, get_block);
    2529         [ #  # ]:          0 :                 if (unlikely(ret))
    2530                 :            :                         goto out_release;
    2531                 :            :                 return ret;
    2532                 :            :         }
    2533                 :            : 
    2534         [ #  # ]:          0 :         if (PageMappedToDisk(page))
    2535                 :            :                 return 0;
    2536                 :            : 
    2537                 :            :         /*
    2538                 :            :          * Allocate buffers so that we can keep track of state, and potentially
    2539                 :            :          * attach them to the page if an error occurs. In the common case of
    2540                 :            :          * no error, they will just be freed again without ever being attached
    2541                 :            :          * to the page (which is all OK, because we're under the page lock).
    2542                 :            :          *
    2543                 :            :          * Be careful: the buffer linked list is a NULL terminated one, rather
    2544                 :            :          * than the circular one we're used to.
    2545                 :            :          */
    2546                 :          0 :         head = alloc_page_buffers(page, blocksize, 0);
    2547         [ #  # ]:          0 :         if (!head) {
    2548                 :            :                 ret = -ENOMEM;
    2549                 :            :                 goto out_release;
    2550                 :            :         }
    2551                 :            : 
    2552                 :          0 :         block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
    2553                 :            : 
    2554                 :            :         /*
    2555                 :            :          * We loop across all blocks in the page, whether or not they are
    2556                 :            :          * part of the affected region.  This is so we can discover if the
    2557                 :            :          * page is fully mapped-to-disk.
    2558                 :            :          */
    2559         [ #  # ]:          0 :         for (block_start = 0, block_in_page = 0, bh = head;
    2560                 :            :                   block_start < PAGE_CACHE_SIZE;
    2561                 :          0 :                   block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
    2562                 :            :                 int create;
    2563                 :            : 
    2564                 :          0 :                 block_end = block_start + blocksize;
    2565                 :          0 :                 bh->b_state = 0;
    2566                 :            :                 create = 1;
    2567         [ #  # ]:          0 :                 if (block_start >= to)
    2568                 :            :                         create = 0;
    2569                 :          0 :                 ret = get_block(inode, block_in_file + block_in_page,
    2570                 :            :                                         bh, create);
    2571         [ #  # ]:          0 :                 if (ret)
    2572                 :            :                         goto failed;
    2573         [ #  # ]:          0 :                 if (!buffer_mapped(bh))
    2574                 :            :                         is_mapped_to_disk = 0;
    2575         [ #  # ]:          0 :                 if (buffer_new(bh))
    2576                 :          0 :                         unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
    2577         [ #  # ]:          0 :                 if (PageUptodate(page)) {
    2578                 :            :                         set_buffer_uptodate(bh);
    2579                 :          0 :                         continue;
    2580                 :            :                 }
    2581 [ #  # ][ #  # ]:          0 :                 if (buffer_new(bh) || !buffer_mapped(bh)) {
    2582                 :            :                         zero_user_segments(page, block_start, from,
    2583                 :            :                                                         to, block_end);
    2584                 :          0 :                         continue;
    2585                 :            :                 }
    2586         [ #  # ]:          0 :                 if (buffer_uptodate(bh))
    2587                 :          0 :                         continue;       /* reiserfs does this */
    2588         [ #  # ]:          0 :                 if (block_start < from || block_end > to) {
    2589                 :            :                         lock_buffer(bh);
    2590                 :          0 :                         bh->b_end_io = end_buffer_read_nobh;
    2591                 :            :                         submit_bh(READ, bh);
    2592                 :          0 :                         nr_reads++;
    2593                 :            :                 }
    2594                 :            :         }
    2595                 :            : 
    2596         [ #  # ]:          0 :         if (nr_reads) {
    2597                 :            :                 /*
    2598                 :            :                  * The page is locked, so these buffers are protected from
    2599                 :            :                  * any VM or truncate activity.  Hence we don't need to care
    2600                 :            :                  * for the buffer_head refcounts.
    2601                 :            :                  */
    2602         [ #  # ]:          0 :                 for (bh = head; bh; bh = bh->b_this_page) {
    2603                 :            :                         wait_on_buffer(bh);
    2604         [ #  # ]:          0 :                         if (!buffer_uptodate(bh))
    2605                 :            :                                 ret = -EIO;
    2606                 :            :                 }
    2607         [ #  # ]:          0 :                 if (ret)
    2608                 :            :                         goto failed;
    2609                 :            :         }
    2610                 :            : 
    2611         [ #  # ]:          0 :         if (is_mapped_to_disk)
    2612                 :            :                 SetPageMappedToDisk(page);
    2613                 :            : 
    2614                 :          0 :         *fsdata = head; /* to be released by nobh_write_end */
    2615                 :            : 
    2616                 :          0 :         return 0;
    2617                 :            : 
    2618                 :            : failed:
    2619         [ #  # ]:          0 :         BUG_ON(!ret);
    2620                 :            :         /*
    2621                 :            :          * Error recovery is a bit difficult. We need to zero out blocks that
    2622                 :            :          * were newly allocated, and dirty them to ensure they get written out.
    2623                 :            :          * Buffers need to be attached to the page at this point, otherwise
    2624                 :            :          * the handling of potential IO errors during writeout would be hard
    2625                 :            :          * (could try doing synchronous writeout, but what if that fails too?)
    2626                 :            :          */
    2627                 :          0 :         attach_nobh_buffers(page, head);
    2628                 :          0 :         page_zero_new_buffers(page, from, to);
    2629                 :            : 
    2630                 :            : out_release:
    2631                 :          0 :         unlock_page(page);
    2632                 :          0 :         page_cache_release(page);
    2633                 :          0 :         *pagep = NULL;
    2634                 :            : 
    2635                 :          0 :         return ret;
    2636                 :            : }
    2637                 :            : EXPORT_SYMBOL(nobh_write_begin);
    2638                 :            : 
    2639                 :          0 : int nobh_write_end(struct file *file, struct address_space *mapping,
    2640                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2641                 :            :                         struct page *page, void *fsdata)
    2642                 :            : {
    2643                 :          0 :         struct inode *inode = page->mapping->host;
    2644                 :            :         struct buffer_head *head = fsdata;
    2645                 :            :         struct buffer_head *bh;
    2646 [ #  # ][ #  # ]:          0 :         BUG_ON(fsdata != NULL && page_has_buffers(page));
    2647                 :            : 
    2648 [ #  # ][ #  # ]:          0 :         if (unlikely(copied < len) && head)
    2649                 :          0 :                 attach_nobh_buffers(page, head);
    2650         [ #  # ]:          0 :         if (page_has_buffers(page))
    2651                 :          0 :                 return generic_write_end(file, mapping, pos, len,
    2652                 :            :                                         copied, page, fsdata);
    2653                 :            : 
    2654                 :            :         SetPageUptodate(page);
    2655                 :          0 :         set_page_dirty(page);
    2656         [ #  # ]:          0 :         if (pos+copied > inode->i_size) {
    2657                 :            :                 i_size_write(inode, pos+copied);
    2658                 :            :                 mark_inode_dirty(inode);
    2659                 :            :         }
    2660                 :            : 
    2661                 :          0 :         unlock_page(page);
    2662                 :          0 :         page_cache_release(page);
    2663                 :            : 
    2664         [ #  # ]:          0 :         while (head) {
    2665                 :            :                 bh = head;
    2666                 :          0 :                 head = head->b_this_page;
    2667                 :          0 :                 free_buffer_head(bh);
    2668                 :            :         }
    2669                 :            : 
    2670                 :          0 :         return copied;
    2671                 :            : }
    2672                 :            : EXPORT_SYMBOL(nobh_write_end);
    2673                 :            : 
    2674                 :            : /*
    2675                 :            :  * nobh_writepage() - based on block_full_write_page() except
    2676                 :            :  * that it tries to operate without attaching bufferheads to
    2677                 :            :  * the page.
    2678                 :            :  */
    2679                 :          0 : int nobh_writepage(struct page *page, get_block_t *get_block,
    2680                 :            :                         struct writeback_control *wbc)
    2681                 :            : {
    2682                 :          0 :         struct inode * const inode = page->mapping->host;
    2683                 :            :         loff_t i_size = i_size_read(inode);
    2684                 :          0 :         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
    2685                 :            :         unsigned offset;
    2686                 :            :         int ret;
    2687                 :            : 
    2688                 :            :         /* Is the page fully inside i_size? */
    2689         [ #  # ]:          0 :         if (page->index < end_index)
    2690                 :            :                 goto out;
    2691                 :            : 
    2692                 :            :         /* Is the page fully outside i_size? (truncate in progress) */
    2693                 :          0 :         offset = i_size & (PAGE_CACHE_SIZE-1);
    2694 [ #  # ][ #  # ]:          0 :         if (page->index >= end_index+1 || !offset) {
    2695                 :            :                 /*
    2696                 :            :                  * The page may have dirty, unmapped buffers.  For example,
    2697                 :            :                  * they may have been added in ext3_writepage().  Make them
    2698                 :            :                  * freeable here, so the page does not leak.
    2699                 :            :                  */
    2700                 :            : #if 0
    2701                 :            :                 /* Not really sure about this  - do we need this ? */
    2702                 :            :                 if (page->mapping->a_ops->invalidatepage)
    2703                 :            :                         page->mapping->a_ops->invalidatepage(page, offset);
    2704                 :            : #endif
    2705                 :          0 :                 unlock_page(page);
    2706                 :          0 :                 return 0; /* don't care */
    2707                 :            :         }
    2708                 :            : 
    2709                 :            :         /*
    2710                 :            :          * The page straddles i_size.  It must be zeroed out on each and every
    2711                 :            :          * writepage invocation because it may be mmapped.  "A file is mapped
    2712                 :            :          * in multiples of the page size.  For a file that is not a multiple of
    2713                 :            :          * the  page size, the remaining memory is zeroed when mapped, and
    2714                 :            :          * writes to that region are not written out to the file."
    2715                 :            :          */
    2716                 :            :         zero_user_segment(page, offset, PAGE_CACHE_SIZE);
    2717                 :            : out:
    2718                 :          0 :         ret = mpage_writepage(page, get_block, wbc);
    2719         [ #  # ]:          0 :         if (ret == -EAGAIN)
    2720                 :          0 :                 ret = __block_write_full_page(inode, page, get_block, wbc,
    2721                 :            :                                               end_buffer_async_write);
    2722                 :          0 :         return ret;
    2723                 :            : }
    2724                 :            : EXPORT_SYMBOL(nobh_writepage);
    2725                 :            : 
    2726                 :          0 : int nobh_truncate_page(struct address_space *mapping,
    2727                 :            :                         loff_t from, get_block_t *get_block)
    2728                 :            : {
    2729                 :          0 :         pgoff_t index = from >> PAGE_CACHE_SHIFT;
    2730                 :          0 :         unsigned offset = from & (PAGE_CACHE_SIZE-1);
    2731                 :            :         unsigned blocksize;
    2732                 :            :         sector_t iblock;
    2733                 :            :         unsigned length, pos;
    2734                 :          0 :         struct inode *inode = mapping->host;
    2735                 :            :         struct page *page;
    2736                 :            :         struct buffer_head map_bh;
    2737                 :            :         int err;
    2738                 :            : 
    2739                 :          0 :         blocksize = 1 << inode->i_blkbits;
    2740                 :          0 :         length = offset & (blocksize - 1);
    2741                 :            : 
    2742                 :            :         /* Block boundary? Nothing to do */
    2743         [ #  # ]:          0 :         if (!length)
    2744                 :            :                 return 0;
    2745                 :            : 
    2746                 :          0 :         length = blocksize - length;
    2747                 :          0 :         iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
    2748                 :            : 
    2749                 :            :         page = grab_cache_page(mapping, index);
    2750                 :            :         err = -ENOMEM;
    2751         [ #  # ]:          0 :         if (!page)
    2752                 :            :                 goto out;
    2753                 :            : 
    2754         [ #  # ]:          0 :         if (page_has_buffers(page)) {
    2755                 :            : has_buffers:
    2756                 :          0 :                 unlock_page(page);
    2757                 :          0 :                 page_cache_release(page);
    2758                 :          0 :                 return block_truncate_page(mapping, from, get_block);
    2759                 :            :         }
    2760                 :            : 
    2761                 :            :         /* Find the buffer that contains "offset" */
    2762                 :            :         pos = blocksize;
    2763         [ #  # ]:          0 :         while (offset >= pos) {
    2764                 :          0 :                 iblock++;
    2765                 :          0 :                 pos += blocksize;
    2766                 :            :         }
    2767                 :            : 
    2768                 :          0 :         map_bh.b_size = blocksize;
    2769                 :          0 :         map_bh.b_state = 0;
    2770                 :          0 :         err = get_block(inode, iblock, &map_bh, 0);
    2771         [ #  # ]:          0 :         if (err)
    2772                 :            :                 goto unlock;
    2773                 :            :         /* unmapped? It's a hole - nothing to do */
    2774         [ #  # ]:          0 :         if (!buffer_mapped(&map_bh))
    2775                 :            :                 goto unlock;
    2776                 :            : 
    2777                 :            :         /* Ok, it's mapped. Make sure it's up-to-date */
    2778         [ #  # ]:          0 :         if (!PageUptodate(page)) {
    2779                 :          0 :                 err = mapping->a_ops->readpage(NULL, page);
    2780         [ #  # ]:          0 :                 if (err) {
    2781                 :          0 :                         page_cache_release(page);
    2782                 :          0 :                         goto out;
    2783                 :            :                 }
    2784                 :            :                 lock_page(page);
    2785         [ #  # ]:          0 :                 if (!PageUptodate(page)) {
    2786                 :            :                         err = -EIO;
    2787                 :            :                         goto unlock;
    2788                 :            :                 }
    2789         [ #  # ]:          0 :                 if (page_has_buffers(page))
    2790                 :            :                         goto has_buffers;
    2791                 :            :         }
    2792                 :            :         zero_user(page, offset, length);
    2793                 :          0 :         set_page_dirty(page);
    2794                 :            :         err = 0;
    2795                 :            : 
    2796                 :            : unlock:
    2797                 :          0 :         unlock_page(page);
    2798                 :          0 :         page_cache_release(page);
    2799                 :            : out:
    2800                 :          0 :         return err;
    2801                 :            : }
    2802                 :            : EXPORT_SYMBOL(nobh_truncate_page);
    2803                 :            : 
    2804                 :          0 : int block_truncate_page(struct address_space *mapping,
    2805                 :            :                         loff_t from, get_block_t *get_block)
    2806                 :            : {
    2807                 :          0 :         pgoff_t index = from >> PAGE_CACHE_SHIFT;
    2808                 :          0 :         unsigned offset = from & (PAGE_CACHE_SIZE-1);
    2809                 :            :         unsigned blocksize;
    2810                 :            :         sector_t iblock;
    2811                 :            :         unsigned length, pos;
    2812                 :          0 :         struct inode *inode = mapping->host;
    2813                 :            :         struct page *page;
    2814                 :            :         struct buffer_head *bh;
    2815                 :            :         int err;
    2816                 :            : 
    2817                 :          0 :         blocksize = 1 << inode->i_blkbits;
    2818                 :          0 :         length = offset & (blocksize - 1);
    2819                 :            : 
    2820                 :            :         /* Block boundary? Nothing to do */
    2821         [ #  # ]:          0 :         if (!length)
    2822                 :            :                 return 0;
    2823                 :            : 
    2824                 :          0 :         length = blocksize - length;
    2825                 :          0 :         iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
    2826                 :            :         
    2827                 :            :         page = grab_cache_page(mapping, index);
    2828                 :            :         err = -ENOMEM;
    2829         [ #  # ]:          0 :         if (!page)
    2830                 :            :                 goto out;
    2831                 :            : 
    2832         [ #  # ]:          0 :         if (!page_has_buffers(page))
    2833                 :          0 :                 create_empty_buffers(page, blocksize, 0);
    2834                 :            : 
    2835                 :            :         /* Find the buffer that contains "offset" */
    2836         [ #  # ]:          0 :         bh = page_buffers(page);
    2837                 :            :         pos = blocksize;
    2838         [ #  # ]:          0 :         while (offset >= pos) {
    2839                 :          0 :                 bh = bh->b_this_page;
    2840                 :          0 :                 iblock++;
    2841                 :          0 :                 pos += blocksize;
    2842                 :            :         }
    2843                 :            : 
    2844                 :            :         err = 0;
    2845         [ #  # ]:          0 :         if (!buffer_mapped(bh)) {
    2846         [ #  # ]:          0 :                 WARN_ON(bh->b_size != blocksize);
    2847                 :          0 :                 err = get_block(inode, iblock, bh, 0);
    2848         [ #  # ]:          0 :                 if (err)
    2849                 :            :                         goto unlock;
    2850                 :            :                 /* unmapped? It's a hole - nothing to do */
    2851         [ #  # ]:          0 :                 if (!buffer_mapped(bh))
    2852                 :            :                         goto unlock;
    2853                 :            :         }
    2854                 :            : 
    2855                 :            :         /* Ok, it's mapped. Make sure it's up-to-date */
    2856         [ #  # ]:          0 :         if (PageUptodate(page))
    2857                 :          0 :                 set_buffer_uptodate(bh);
    2858                 :            : 
    2859 [ #  # ][ #  # ]:          0 :         if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
                 [ #  # ]
    2860                 :            :                 err = -EIO;
    2861                 :          0 :                 ll_rw_block(READ, 1, &bh);
    2862                 :          0 :                 wait_on_buffer(bh);
    2863                 :            :                 /* Uhhuh. Read error. Complain and punt. */
    2864         [ #  # ]:          0 :                 if (!buffer_uptodate(bh))
    2865                 :            :                         goto unlock;
    2866                 :            :         }
    2867                 :            : 
    2868                 :            :         zero_user(page, offset, length);
    2869                 :          0 :         mark_buffer_dirty(bh);
    2870                 :            :         err = 0;
    2871                 :            : 
    2872                 :            : unlock:
    2873                 :          0 :         unlock_page(page);
    2874                 :          0 :         page_cache_release(page);
    2875                 :            : out:
    2876                 :          0 :         return err;
    2877                 :            : }
    2878                 :            : EXPORT_SYMBOL(block_truncate_page);
    2879                 :            : 
    2880                 :            : /*
    2881                 :            :  * The generic ->writepage function for buffer-backed address_spaces
    2882                 :            :  * this form passes in the end_io handler used to finish the IO.
    2883                 :            :  */
    2884                 :          0 : int block_write_full_page_endio(struct page *page, get_block_t *get_block,
    2885                 :            :                         struct writeback_control *wbc, bh_end_io_t *handler)
    2886                 :            : {
    2887                 :      23984 :         struct inode * const inode = page->mapping->host;
    2888                 :            :         loff_t i_size = i_size_read(inode);
    2889                 :      23984 :         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
    2890                 :            :         unsigned offset;
    2891                 :            : 
    2892                 :            :         /* Is the page fully inside i_size? */
    2893         [ +  + ]:      23984 :         if (page->index < end_index)
    2894                 :      23983 :                 return __block_write_full_page(inode, page, get_block, wbc,
    2895                 :            :                                                handler);
    2896                 :            : 
    2897                 :            :         /* Is the page fully outside i_size? (truncate in progress) */
    2898                 :          1 :         offset = i_size & (PAGE_CACHE_SIZE-1);
    2899 [ +  - ][ -  + ]:          1 :         if (page->index >= end_index+1 || !offset) {
    2900                 :            :                 /*
    2901                 :            :                  * The page may have dirty, unmapped buffers.  For example,
    2902                 :            :                  * they may have been added in ext3_writepage().  Make them
    2903                 :            :                  * freeable here, so the page does not leak.
    2904                 :            :                  */
    2905                 :          0 :                 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
    2906                 :          0 :                 unlock_page(page);
    2907                 :          0 :                 return 0; /* don't care */
    2908                 :            :         }
    2909                 :            : 
    2910                 :            :         /*
    2911                 :            :          * The page straddles i_size.  It must be zeroed out on each and every
    2912                 :            :          * writepage invocation because it may be mmapped.  "A file is mapped
    2913                 :            :          * in multiples of the page size.  For a file that is not a multiple of
    2914                 :            :          * the  page size, the remaining memory is zeroed when mapped, and
    2915                 :            :          * writes to that region are not written out to the file."
    2916                 :            :          */
    2917                 :            :         zero_user_segment(page, offset, PAGE_CACHE_SIZE);
    2918                 :          1 :         return __block_write_full_page(inode, page, get_block, wbc, handler);
    2919                 :            : }
    2920                 :            : EXPORT_SYMBOL(block_write_full_page_endio);
    2921                 :            : 
    2922                 :            : /*
    2923                 :            :  * The generic ->writepage function for buffer-backed address_spaces
    2924                 :            :  */
    2925                 :          0 : int block_write_full_page(struct page *page, get_block_t *get_block,
    2926                 :            :                         struct writeback_control *wbc)
    2927                 :            : {
    2928                 :      23984 :         return block_write_full_page_endio(page, get_block, wbc,
    2929                 :            :                                            end_buffer_async_write);
    2930                 :            : }
    2931                 :            : EXPORT_SYMBOL(block_write_full_page);
    2932                 :            : 
    2933                 :          0 : sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
    2934                 :            :                             get_block_t *get_block)
    2935                 :            : {
    2936                 :            :         struct buffer_head tmp;
    2937                 :     123440 :         struct inode *inode = mapping->host;
    2938                 :     123440 :         tmp.b_state = 0;
    2939                 :     123440 :         tmp.b_blocknr = 0;
    2940                 :     123440 :         tmp.b_size = 1 << inode->i_blkbits;
    2941                 :     123440 :         get_block(inode, block, &tmp, 0);
    2942                 :     123440 :         return tmp.b_blocknr;
    2943                 :            : }
    2944                 :            : EXPORT_SYMBOL(generic_block_bmap);
    2945                 :            : 
    2946                 :          0 : static void end_bio_bh_io_sync(struct bio *bio, int err)
    2947                 :            : {
    2948                 :     118497 :         struct buffer_head *bh = bio->bi_private;
    2949                 :            : 
    2950         [ -  + ]:     118497 :         if (err == -EOPNOTSUPP) {
    2951                 :          0 :                 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
    2952                 :            :         }
    2953                 :            : 
    2954         [ -  + ]:     118497 :         if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
    2955                 :          0 :                 set_bit(BH_Quiet, &bh->b_state);
    2956                 :            : 
    2957                 :     118497 :         bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
    2958                 :     118497 :         bio_put(bio);
    2959                 :     118497 : }
    2960                 :            : 
    2961                 :            : /*
    2962                 :            :  * This allows us to do IO even on the odd last sectors
    2963                 :            :  * of a device, even if the bh block size is some multiple
    2964                 :            :  * of the physical sector size.
    2965                 :            :  *
    2966                 :            :  * We'll just truncate the bio to the size of the device,
    2967                 :            :  * and clear the end of the buffer head manually.
    2968                 :            :  *
    2969                 :            :  * Truly out-of-range accesses will turn into actual IO
    2970                 :            :  * errors, this only handles the "we need to be able to
    2971                 :            :  * do IO at the final sector" case.
    2972                 :            :  */
    2973                 :          0 : static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
    2974                 :            : {
    2975                 :            :         sector_t maxsector;
    2976                 :            :         unsigned bytes;
    2977                 :            : 
    2978                 :     118498 :         maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
    2979         [ +  - ]:     118498 :         if (!maxsector)
    2980                 :            :                 return;
    2981                 :            : 
    2982                 :            :         /*
    2983                 :            :          * If the *whole* IO is past the end of the device,
    2984                 :            :          * let it through, and the IO layer will turn it into
    2985                 :            :          * an EIO.
    2986                 :            :          */
    2987         [ +  - ]:     118498 :         if (unlikely(bio->bi_iter.bi_sector >= maxsector))
    2988                 :            :                 return;
    2989                 :            : 
    2990                 :     118498 :         maxsector -= bio->bi_iter.bi_sector;
    2991                 :     118498 :         bytes = bio->bi_iter.bi_size;
    2992         [ -  + ]:     118498 :         if (likely((bytes >> 9) <= maxsector))
    2993                 :            :                 return;
    2994                 :            : 
    2995                 :            :         /* Uhhuh. We've got a bh that straddles the device size! */
    2996                 :          0 :         bytes = maxsector << 9;
    2997                 :            : 
    2998                 :            :         /* Truncate the bio.. */
    2999                 :          0 :         bio->bi_iter.bi_size = bytes;
    3000                 :          0 :         bio->bi_io_vec[0].bv_len = bytes;
    3001                 :            : 
    3002                 :            :         /* ..and clear the end of the buffer for reads */
    3003         [ #  # ]:          0 :         if ((rw & RW_MASK) == READ) {
    3004                 :          0 :                 void *kaddr = kmap_atomic(bh->b_page);
    3005         [ #  # ]:          0 :                 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
    3006                 :          0 :                 kunmap_atomic(kaddr);
    3007                 :          0 :                 flush_dcache_page(bh->b_page);
    3008                 :            :         }
    3009                 :            : }
    3010                 :            : 
    3011                 :          0 : int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
    3012                 :            : {
    3013                 :            :         struct bio *bio;
    3014                 :            :         int ret = 0;
    3015                 :            : 
    3016         [ -  + ]:     118498 :         BUG_ON(!buffer_locked(bh));
    3017         [ -  + ]:     118498 :         BUG_ON(!buffer_mapped(bh));
    3018         [ -  + ]:     118498 :         BUG_ON(!bh->b_end_io);
    3019         [ -  + ]:     118498 :         BUG_ON(buffer_delay(bh));
    3020         [ -  + ]:     118498 :         BUG_ON(buffer_unwritten(bh));
    3021                 :            : 
    3022                 :            :         /*
    3023                 :            :          * Only clear out a write error when rewriting
    3024                 :            :          */
    3025 [ +  + ][ +  - ]:     118498 :         if (test_set_buffer_req(bh) && (rw & WRITE))
    3026                 :            :                 clear_buffer_write_io_error(bh);
    3027                 :            : 
    3028                 :            :         /*
    3029                 :            :          * from here on down, it's all bio -- do the initial mapping,
    3030                 :            :          * submit_bio -> generic_make_request may further map this bio around
    3031                 :            :          */
    3032                 :            :         bio = bio_alloc(GFP_NOIO, 1);
    3033                 :            : 
    3034                 :     118498 :         bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
    3035                 :     118498 :         bio->bi_bdev = bh->b_bdev;
    3036                 :     118498 :         bio->bi_io_vec[0].bv_page = bh->b_page;
    3037                 :     118498 :         bio->bi_io_vec[0].bv_len = bh->b_size;
    3038                 :     118498 :         bio->bi_io_vec[0].bv_offset = bh_offset(bh);
    3039                 :            : 
    3040                 :     118498 :         bio->bi_vcnt = 1;
    3041                 :     118498 :         bio->bi_iter.bi_size = bh->b_size;
    3042                 :            : 
    3043                 :     118498 :         bio->bi_end_io = end_bio_bh_io_sync;
    3044                 :     118498 :         bio->bi_private = bh;
    3045                 :     118498 :         bio->bi_flags |= bio_flags;
    3046                 :            : 
    3047                 :            :         /* Take care of bh's that straddle the end of the device */
    3048                 :     118498 :         guard_bh_eod(rw, bio, bh);
    3049                 :            : 
    3050         [ +  + ]:     118498 :         if (buffer_meta(bh))
    3051                 :      13595 :                 rw |= REQ_META;
    3052         [ +  + ]:     118498 :         if (buffer_prio(bh))
    3053                 :      13595 :                 rw |= REQ_PRIO;
    3054                 :            : 
    3055                 :          0 :         bio_get(bio);
    3056                 :     118498 :         submit_bio(rw, bio);
    3057                 :            : 
    3058         [ -  + ]:     118497 :         if (bio_flagged(bio, BIO_EOPNOTSUPP))
    3059                 :            :                 ret = -EOPNOTSUPP;
    3060                 :            : 
    3061                 :     118497 :         bio_put(bio);
    3062                 :     118498 :         return ret;
    3063                 :            : }
    3064                 :            : EXPORT_SYMBOL_GPL(_submit_bh);
    3065                 :            : 
    3066                 :          0 : int submit_bh(int rw, struct buffer_head *bh)
    3067                 :            : {
    3068                 :     118488 :         return _submit_bh(rw, bh, 0);
    3069                 :            : }
    3070                 :            : EXPORT_SYMBOL(submit_bh);
    3071                 :            : 
    3072                 :            : /**
    3073                 :            :  * ll_rw_block: low-level access to block devices (DEPRECATED)
    3074                 :            :  * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
    3075                 :            :  * @nr: number of &struct buffer_heads in the array
    3076                 :            :  * @bhs: array of pointers to &struct buffer_head
    3077                 :            :  *
    3078                 :            :  * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
    3079                 :            :  * requests an I/O operation on them, either a %READ or a %WRITE.  The third
    3080                 :            :  * %READA option is described in the documentation for generic_make_request()
    3081                 :            :  * which ll_rw_block() calls.
    3082                 :            :  *
    3083                 :            :  * This function drops any buffer that it cannot get a lock on (with the
    3084                 :            :  * BH_Lock state bit), any buffer that appears to be clean when doing a write
    3085                 :            :  * request, and any buffer that appears to be up-to-date when doing read
    3086                 :            :  * request.  Further it marks as clean buffers that are processed for
    3087                 :            :  * writing (the buffer cache won't assume that they are actually clean
    3088                 :            :  * until the buffer gets unlocked).
    3089                 :            :  *
    3090                 :            :  * ll_rw_block sets b_end_io to simple completion handler that marks
    3091                 :            :  * the buffer up-to-date (if approriate), unlocks the buffer and wakes
    3092                 :            :  * any waiters. 
    3093                 :            :  *
    3094                 :            :  * All of the buffers must be for the same device, and must also be a
    3095                 :            :  * multiple of the current approved size for the device.
    3096                 :            :  */
    3097                 :          0 : void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
    3098                 :            : {
    3099                 :            :         int i;
    3100                 :            : 
    3101         [ +  + ]:    1251637 :         for (i = 0; i < nr; i++) {
    3102                 :     625862 :                 struct buffer_head *bh = bhs[i];
    3103                 :            : 
    3104         [ +  + ]:     625626 :                 if (!trylock_buffer(bh))
    3105                 :        207 :                         continue;
    3106         [ -  + ]:     625419 :                 if (rw == WRITE) {
    3107         [ #  # ]:          0 :                         if (test_clear_buffer_dirty(bh)) {
    3108                 :          0 :                                 bh->b_end_io = end_buffer_write_sync;
    3109                 :            :                                 get_bh(bh);
    3110                 :            :                                 submit_bh(WRITE, bh);
    3111                 :          0 :                                 continue;
    3112                 :            :                         }
    3113                 :            :                 } else {
    3114         [ +  + ]:     625419 :                         if (!buffer_uptodate(bh)) {
    3115                 :       7163 :                                 bh->b_end_io = end_buffer_read_sync;
    3116                 :            :                                 get_bh(bh);
    3117                 :            :                                 submit_bh(rw, bh);
    3118                 :       7163 :                                 continue;
    3119                 :            :                         }
    3120                 :            :                 }
    3121                 :     618256 :                 unlock_buffer(bh);
    3122                 :            :         }
    3123                 :     625775 : }
    3124                 :            : EXPORT_SYMBOL(ll_rw_block);
    3125                 :            : 
    3126                 :          0 : void write_dirty_buffer(struct buffer_head *bh, int rw)
    3127                 :            : {
    3128                 :            :         lock_buffer(bh);
    3129         [ -  + ]:        560 :         if (!test_clear_buffer_dirty(bh)) {
    3130                 :          0 :                 unlock_buffer(bh);
    3131                 :          0 :                 return;
    3132                 :            :         }
    3133                 :        560 :         bh->b_end_io = end_buffer_write_sync;
    3134                 :            :         get_bh(bh);
    3135                 :            :         submit_bh(rw, bh);
    3136                 :            : }
    3137                 :            : EXPORT_SYMBOL(write_dirty_buffer);
    3138                 :            : 
    3139                 :            : /*
    3140                 :            :  * For a data-integrity writeout, we need to wait upon any in-progress I/O
    3141                 :            :  * and then start new I/O and then wait upon it.  The caller must have a ref on
    3142                 :            :  * the buffer_head.
    3143                 :            :  */
    3144                 :          0 : int __sync_dirty_buffer(struct buffer_head *bh, int rw)
    3145                 :            : {
    3146                 :            :         int ret = 0;
    3147                 :            : 
    3148         [ -  + ]:         59 :         WARN_ON(atomic_read(&bh->b_count) < 1);
    3149                 :            :         lock_buffer(bh);
    3150         [ +  - ]:         59 :         if (test_clear_buffer_dirty(bh)) {
    3151                 :            :                 get_bh(bh);
    3152                 :         59 :                 bh->b_end_io = end_buffer_write_sync;
    3153                 :            :                 ret = submit_bh(rw, bh);
    3154                 :            :                 wait_on_buffer(bh);
    3155 [ +  - ][ -  + ]:         59 :                 if (!ret && !buffer_uptodate(bh))
    3156                 :            :                         ret = -EIO;
    3157                 :            :         } else {
    3158                 :          0 :                 unlock_buffer(bh);
    3159                 :            :         }
    3160                 :         59 :         return ret;
    3161                 :            : }
    3162                 :            : EXPORT_SYMBOL(__sync_dirty_buffer);
    3163                 :            : 
    3164                 :          0 : int sync_dirty_buffer(struct buffer_head *bh)
    3165                 :            : {
    3166                 :         58 :         return __sync_dirty_buffer(bh, WRITE_SYNC);
    3167                 :            : }
    3168                 :            : EXPORT_SYMBOL(sync_dirty_buffer);
    3169                 :            : 
    3170                 :            : /*
    3171                 :            :  * try_to_free_buffers() checks if all the buffers on this particular page
    3172                 :            :  * are unused, and releases them if so.
    3173                 :            :  *
    3174                 :            :  * Exclusion against try_to_free_buffers may be obtained by either
    3175                 :            :  * locking the page or by holding its mapping's private_lock.
    3176                 :            :  *
    3177                 :            :  * If the page is dirty but all the buffers are clean then we need to
    3178                 :            :  * be sure to mark the page clean as well.  This is because the page
    3179                 :            :  * may be against a block device, and a later reattachment of buffers
    3180                 :            :  * to a dirty page will set *all* buffers dirty.  Which would corrupt
    3181                 :            :  * filesystem data on the same device.
    3182                 :            :  *
    3183                 :            :  * The same applies to regular filesystem pages: if all the buffers are
    3184                 :            :  * clean then we set the page clean and proceed.  To do that, we require
    3185                 :            :  * total exclusion from __set_page_dirty_buffers().  That is obtained with
    3186                 :            :  * private_lock.
    3187                 :            :  *
    3188                 :            :  * try_to_free_buffers() is non-blocking.
    3189                 :            :  */
    3190                 :            : static inline int buffer_busy(struct buffer_head *bh)
    3191                 :            : {
    3192                 :    3683554 :         return atomic_read(&bh->b_count) |
    3193                 :    1841777 :                 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
    3194                 :            : }
    3195                 :            : 
    3196                 :            : static int
    3197                 :          0 : drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
    3198                 :            : {
    3199         [ -  + ]:    1840448 :         struct buffer_head *head = page_buffers(page);
    3200                 :            :         struct buffer_head *bh;
    3201                 :            : 
    3202                 :            :         bh = head;
    3203                 :            :         do {
    3204 [ -  + ][ #  # ]:    1841749 :                 if (buffer_write_io_error(bh) && page->mapping)
    3205                 :          0 :                         set_bit(AS_EIO, &page->mapping->flags);
    3206         [ +  + ]:    1841777 :                 if (buffer_busy(bh))
    3207                 :            :                         goto failed;
    3208                 :    1825094 :                 bh = bh->b_this_page;
    3209         [ +  + ]:    1825094 :         } while (bh != head);
    3210                 :            : 
    3211                 :            :         do {
    3212                 :    1825125 :                 struct buffer_head *next = bh->b_this_page;
    3213                 :            : 
    3214         [ -  + ]:    1825125 :                 if (bh->b_assoc_map)
    3215                 :          0 :                         __remove_assoc_queue(bh);
    3216                 :            :                 bh = next;
    3217         [ +  + ]:    1825196 :         } while (bh != head);
    3218                 :    1823864 :         *buffers_to_free = head;
    3219                 :    1823864 :         __clear_page_buffers(page);
    3220                 :    1823793 :         return 1;
    3221                 :            : failed:
    3222                 :            :         return 0;
    3223                 :            : }
    3224                 :            : 
    3225                 :          0 : int try_to_free_buffers(struct page *page)
    3226                 :            : {
    3227                 :    1840496 :         struct address_space * const mapping = page->mapping;
    3228                 :    1840496 :         struct buffer_head *buffers_to_free = NULL;
    3229                 :            :         int ret = 0;
    3230                 :            : 
    3231         [ -  + ]:    1840496 :         BUG_ON(!PageLocked(page));
    3232            [ + ]:    1840496 :         if (PageWriteback(page))
    3233                 :            :                 return 0;
    3234                 :            : 
    3235         [ +  + ]:    1840526 :         if (mapping == NULL) {          /* can this still happen? */
    3236                 :         48 :                 ret = drop_buffers(page, &buffers_to_free);
    3237                 :         48 :                 goto out;
    3238                 :            :         }
    3239                 :            : 
    3240                 :            :         spin_lock(&mapping->private_lock);
    3241                 :    1840452 :         ret = drop_buffers(page, &buffers_to_free);
    3242                 :            : 
    3243                 :            :         /*
    3244                 :            :          * If the filesystem writes its buffers by hand (eg ext3)
    3245                 :            :          * then we can have clean buffers against a dirty page.  We
    3246                 :            :          * clean the page here; otherwise the VM will never notice
    3247                 :            :          * that the filesystem did any IO at all.
    3248                 :            :          *
    3249                 :            :          * Also, during truncate, discard_buffer will have marked all
    3250                 :            :          * the page's buffers clean.  We discover that here and clean
    3251                 :            :          * the page also.
    3252                 :            :          *
    3253                 :            :          * private_lock must be held over this entire operation in order
    3254                 :            :          * to synchronise against __set_page_dirty_buffers and prevent the
    3255                 :            :          * dirty bit from being lost.
    3256                 :            :          */
    3257         [ +  + ]:    1840432 :         if (ret)
    3258                 :    1823751 :                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
    3259                 :            :         spin_unlock(&mapping->private_lock);
    3260                 :            : out:
    3261         [ +  + ]:    3681121 :         if (buffers_to_free) {
    3262                 :            :                 struct buffer_head *bh = buffers_to_free;
    3263                 :            : 
    3264                 :            :                 do {
    3265                 :    1825250 :                         struct buffer_head *next = bh->b_this_page;
    3266                 :    1825250 :                         free_buffer_head(bh);
    3267                 :            :                         bh = next;
    3268         [ +  + ]:    1825184 :                 } while (bh != buffers_to_free);
    3269                 :            :         }
    3270                 :    1840559 :         return ret;
    3271                 :            : }
    3272                 :            : EXPORT_SYMBOL(try_to_free_buffers);
    3273                 :            : 
    3274                 :            : /*
    3275                 :            :  * There are no bdflush tunables left.  But distributions are
    3276                 :            :  * still running obsolete flush daemons, so we terminate them here.
    3277                 :            :  *
    3278                 :            :  * Use of bdflush() is deprecated and will be removed in a future kernel.
    3279                 :            :  * The `flush-X' kernel threads fully replace bdflush daemons and this call.
    3280                 :            :  */
    3281                 :          0 : SYSCALL_DEFINE2(bdflush, int, func, long, data)
    3282                 :            : {
    3283                 :            :         static int msg_count;
    3284                 :            : 
    3285         [ +  - ]:          1 :         if (!capable(CAP_SYS_ADMIN))
    3286                 :            :                 return -EPERM;
    3287                 :            : 
    3288         [ +  - ]:          1 :         if (msg_count < 5) {
    3289                 :          1 :                 msg_count++;
    3290                 :          1 :                 printk(KERN_INFO
    3291                 :            :                         "warning: process `%s' used the obsolete bdflush"
    3292                 :          1 :                         " system call\n", current->comm);
    3293                 :          1 :                 printk(KERN_INFO "Fix your initscripts?\n");
    3294                 :            :         }
    3295                 :            : 
    3296         [ -  + ]:          1 :         if (func == 1)
    3297                 :          0 :                 do_exit(0);
    3298                 :            :         return 0;
    3299                 :            : }
    3300                 :            : 
    3301                 :            : /*
    3302                 :            :  * Buffer-head allocation
    3303                 :            :  */
    3304                 :            : static struct kmem_cache *bh_cachep __read_mostly;
    3305                 :            : 
    3306                 :            : /*
    3307                 :            :  * Once the number of bh's in the machine exceeds this level, we start
    3308                 :            :  * stripping them in writeback.
    3309                 :            :  */
    3310                 :            : static unsigned long max_buffer_heads;
    3311                 :            : 
    3312                 :            : int buffer_heads_over_limit;
    3313                 :            : 
    3314                 :            : struct bh_accounting {
    3315                 :            :         int nr;                 /* Number of live bh's */
    3316                 :            :         int ratelimit;          /* Limit cacheline bouncing */
    3317                 :            : };
    3318                 :            : 
    3319                 :            : static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
    3320                 :            : 
    3321                 :          0 : static void recalc_bh_state(void)
    3322                 :            : {
    3323                 :            :         int i;
    3324                 :            :         int tot = 0;
    3325                 :            : 
    3326         [ +  + ]:    3759148 :         if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
    3327                 :    3759148 :                 return;
    3328                 :       1834 :         __this_cpu_write(bh_accounting.ratelimit, 0);
    3329         [ +  + ]:    3765567 :         for_each_online_cpu(i)
    3330                 :       4585 :                 tot += per_cpu(bh_accounting, i).nr;
    3331                 :        917 :         buffer_heads_over_limit = (tot > max_buffer_heads);
    3332                 :            : }
    3333                 :            : 
    3334                 :          0 : struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
    3335                 :            : {
    3336                 :    1880290 :         struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
    3337            [ + ]:    1882967 :         if (ret) {
    3338                 :    1882978 :                 INIT_LIST_HEAD(&ret->b_assoc_buffers);
    3339                 :    1882978 :                 preempt_disable();
    3340                 :    3765910 :                 __this_cpu_inc(bh_accounting.nr);
    3341                 :    1882955 :                 recalc_bh_state();
    3342                 :    1882497 :                 preempt_enable();
    3343                 :            :         }
    3344                 :       2240 :         return ret;
    3345                 :            : }
    3346                 :            : EXPORT_SYMBOL(alloc_buffer_head);
    3347                 :            : 
    3348                 :          0 : void free_buffer_head(struct buffer_head *bh)
    3349                 :            : {
    3350         [ -  + ]:    1876938 :         BUG_ON(!list_empty(&bh->b_assoc_buffers));
    3351                 :    1876938 :         kmem_cache_free(bh_cachep, bh);
    3352                 :    1876966 :         preempt_disable();
    3353                 :    3753940 :         __this_cpu_dec(bh_accounting.nr);
    3354                 :    1876970 :         recalc_bh_state();
    3355                 :    1876919 :         preempt_enable();
    3356                 :    1876924 : }
    3357                 :            : EXPORT_SYMBOL(free_buffer_head);
    3358                 :            : 
    3359                 :          0 : static void buffer_exit_cpu(int cpu)
    3360                 :            : {
    3361                 :            :         int i;
    3362                 :         78 :         struct bh_lru *b = &per_cpu(bh_lrus, cpu);
    3363                 :            : 
    3364         [ +  + ]:        702 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    3365                 :        624 :                 brelse(b->bhs[i]);
    3366                 :        624 :                 b->bhs[i] = NULL;
    3367                 :            :         }
    3368                 :        156 :         this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
    3369                 :         78 :         per_cpu(bh_accounting, cpu).nr = 0;
    3370                 :         78 : }
    3371                 :            : 
    3372                 :          0 : static int buffer_cpu_notify(struct notifier_block *self,
    3373                 :            :                               unsigned long action, void *hcpu)
    3374                 :            : {
    3375         [ +  + ]:        555 :         if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
    3376                 :         78 :                 buffer_exit_cpu((unsigned long)hcpu);
    3377                 :          0 :         return NOTIFY_OK;
    3378                 :            : }
    3379                 :            : 
    3380                 :            : /**
    3381                 :            :  * bh_uptodate_or_lock - Test whether the buffer is uptodate
    3382                 :            :  * @bh: struct buffer_head
    3383                 :            :  *
    3384                 :            :  * Return true if the buffer is up-to-date and false,
    3385                 :            :  * with the buffer locked, if not.
    3386                 :            :  */
    3387                 :          0 : int bh_uptodate_or_lock(struct buffer_head *bh)
    3388                 :            : {
    3389         [ +  + ]:     417905 :         if (!buffer_uptodate(bh)) {
    3390                 :            :                 lock_buffer(bh);
    3391         [ -  + ]:         18 :                 if (!buffer_uptodate(bh))
    3392                 :            :                         return 0;
    3393                 :          0 :                 unlock_buffer(bh);
    3394                 :            :         }
    3395                 :            :         return 1;
    3396                 :            : }
    3397                 :            : EXPORT_SYMBOL(bh_uptodate_or_lock);
    3398                 :            : 
    3399                 :            : /**
    3400                 :            :  * bh_submit_read - Submit a locked buffer for reading
    3401                 :            :  * @bh: struct buffer_head
    3402                 :            :  *
    3403                 :            :  * Returns zero on success and -EIO on error.
    3404                 :            :  */
    3405                 :          0 : int bh_submit_read(struct buffer_head *bh)
    3406                 :            : {
    3407         [ -  + ]:         17 :         BUG_ON(!buffer_locked(bh));
    3408                 :            : 
    3409         [ -  + ]:         17 :         if (buffer_uptodate(bh)) {
    3410                 :          0 :                 unlock_buffer(bh);
    3411                 :          0 :                 return 0;
    3412                 :            :         }
    3413                 :            : 
    3414                 :            :         get_bh(bh);
    3415                 :         17 :         bh->b_end_io = end_buffer_read_sync;
    3416                 :            :         submit_bh(READ, bh);
    3417                 :            :         wait_on_buffer(bh);
    3418         [ -  + ]:         34 :         if (buffer_uptodate(bh))
    3419                 :            :                 return 0;
    3420                 :          0 :         return -EIO;
    3421                 :            : }
    3422                 :            : EXPORT_SYMBOL(bh_submit_read);
    3423                 :            : 
    3424                 :          0 : void __init buffer_init(void)
    3425                 :            : {
    3426                 :            :         unsigned long nrpages;
    3427                 :            : 
    3428                 :          0 :         bh_cachep = kmem_cache_create("buffer_head",
    3429                 :            :                         sizeof(struct buffer_head), 0,
    3430                 :            :                                 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
    3431                 :            :                                 SLAB_MEM_SPREAD),
    3432                 :            :                                 NULL);
    3433                 :            : 
    3434                 :            :         /*
    3435                 :            :          * Limit the bh occupancy to 10% of ZONE_NORMAL
    3436                 :            :          */
    3437                 :          0 :         nrpages = (nr_free_buffer_pages() * 10) / 100;
    3438                 :          0 :         max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
    3439                 :          0 :         hotcpu_notifier(buffer_cpu_notify, 0);
    3440                 :          0 : }

Generated by: LCOV version 1.9