LCOV - coverage.info - fs/buffer.c

LCOV - code coverage report

Current view:	top level - fs - buffer.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	520	1072	48.5 %
Date:	2014-02-18	Functions:	62	101	61.4 %
		Branches:	303	1155	26.2 %

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :  *  linux/fs/buffer.c
       3                 :            :  *
       4                 :            :  *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
       5                 :            :  */
       6                 :            : 
       7                 :            : /*
       8                 :            :  * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
       9                 :            :  *
      10                 :            :  * Removed a lot of unnecessary code and simplified things now that
      11                 :            :  * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
      12                 :            :  *
      13                 :            :  * Speed up hash, lru, and free list operations.  Use gfp() for allocating
      14                 :            :  * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
      15                 :            :  *
      16                 :            :  * Added 32k buffer block sizes - these are required older ARM systems. - RMK
      17                 :            :  *
      18                 :            :  * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
      19                 :            :  */
      20                 :            : 
      21                 :            : #include <linux/kernel.h>
      22                 :            : #include <linux/syscalls.h>
      23                 :            : #include <linux/fs.h>
      24                 :            : #include <linux/mm.h>
      25                 :            : #include <linux/percpu.h>
      26                 :            : #include <linux/slab.h>
      27                 :            : #include <linux/capability.h>
      28                 :            : #include <linux/blkdev.h>
      29                 :            : #include <linux/file.h>
      30                 :            : #include <linux/quotaops.h>
      31                 :            : #include <linux/highmem.h>
      32                 :            : #include <linux/export.h>
      33                 :            : #include <linux/writeback.h>
      34                 :            : #include <linux/hash.h>
      35                 :            : #include <linux/suspend.h>
      36                 :            : #include <linux/buffer_head.h>
      37                 :            : #include <linux/task_io_accounting_ops.h>
      38                 :            : #include <linux/bio.h>
      39                 :            : #include <linux/notifier.h>
      40                 :            : #include <linux/cpu.h>
      41                 :            : #include <linux/bitops.h>
      42                 :            : #include <linux/mpage.h>
      43                 :            : #include <linux/bit_spinlock.h>
      44                 :            : #include <trace/events/block.h>
      45                 :            : 
      46                 :            : static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
      47                 :            : 
      48                 :            : #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
      49                 :            : 
      50                 :          0 : void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
      51                 :            : {
      52                 :      59594 :         bh->b_end_io = handler;
      53                 :      59594 :         bh->b_private = private;
      54                 :          0 : }
      55                 :            : EXPORT_SYMBOL(init_buffer);
      56                 :            : 
      57                 :          0 : inline void touch_buffer(struct buffer_head *bh)
      58                 :            : {
      59                 :            :         trace_block_touch_buffer(bh);
      60                 :   14354989 :         mark_page_accessed(bh->b_page);
      61                 :          0 : }
      62                 :            : EXPORT_SYMBOL(touch_buffer);
      63                 :            : 
      64                 :          0 : static int sleep_on_buffer(void *word)
      65                 :            : {
      66                 :      43784 :         io_schedule();
      67                 :      43784 :         return 0;
      68                 :            : }
      69                 :            : 
      70                 :          0 : void __lock_buffer(struct buffer_head *bh)
      71                 :            : {
      72                 :       4990 :         wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer,
      73                 :            :                                                         TASK_UNINTERRUPTIBLE);
      74                 :          1 : }
      75                 :            : EXPORT_SYMBOL(__lock_buffer);
      76                 :            : 
      77                 :          0 : void unlock_buffer(struct buffer_head *bh)
      78                 :            : {
      79                 :   19032298 :         clear_bit_unlock(BH_Lock, &bh->b_state);
      80                 :   19032982 :         smp_mb__after_clear_bit();
      81                 :   19032671 :         wake_up_bit(&bh->b_state, BH_Lock);
      82                 :   19032215 : }
      83                 :            : EXPORT_SYMBOL(unlock_buffer);
      84                 :            : 
      85                 :            : /*
      86                 :            :  * Returns if the page has dirty or writeback buffers. If all the buffers
      87                 :            :  * are unlocked and clean then the PageDirty information is stale. If
      88                 :            :  * any of the pages are locked, it is assumed they are locked for IO.
      89                 :            :  */
      90                 :          0 : void buffer_check_dirty_writeback(struct page *page,
      91                 :            :                                      bool *dirty, bool *writeback)
      92                 :            : {
      93                 :            :         struct buffer_head *head, *bh;
      94                 :      52196 :         *dirty = false;
      95                 :      52196 :         *writeback = false;
      96                 :            : 
      97         [ -  + ]:      52196 :         BUG_ON(!PageLocked(page));
      98                 :            : 
      99            [ + ]:      52196 :         if (!page_has_buffers(page))
     100                 :          0 :                 return;
     101                 :            : 
     102         [ +  + ]:      52215 :         if (PageWriteback(page))
     103                 :        104 :                 *writeback = true;
     104                 :            : 
     105                 :            :         head = page_buffers(page);
     106                 :            :         bh = head;
     107                 :            :         do {
     108         [ +  + ]:      52215 :                 if (buffer_locked(bh))
     109                 :        105 :                         *writeback = true;
     110                 :            : 
     111         [ +  + ]:      52215 :                 if (buffer_dirty(bh))
     112                 :         37 :                         *dirty = true;
     113                 :            : 
     114                 :      52215 :                 bh = bh->b_this_page;
     115         [ -  + ]:      52215 :         } while (bh != head);
     116                 :            : }
     117                 :            : EXPORT_SYMBOL(buffer_check_dirty_writeback);
     118                 :            : 
     119                 :            : /*
     120                 :            :  * Block until a buffer comes unlocked.  This doesn't stop it
     121                 :            :  * from becoming locked again - you have to lock it yourself
     122                 :            :  * if you want to preserve its state.
     123                 :            :  */
     124                 :          0 : void __wait_on_buffer(struct buffer_head * bh)
     125                 :            : {
     126                 :      42037 :         wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE);
     127                 :          0 : }
     128                 :            : EXPORT_SYMBOL(__wait_on_buffer);
     129                 :            : 
     130                 :            : static void
     131                 :          0 : __clear_page_buffers(struct page *page)
     132                 :            : {
     133                 :            :         ClearPagePrivate(page);
     134                 :    1913330 :         set_page_private(page, 0);
     135                 :    1913330 :         page_cache_release(page);
     136                 :    1913326 : }
     137                 :            : 
     138                 :            : 
     139                 :          0 : static int quiet_error(struct buffer_head *bh)
     140                 :            : {
     141 [ #  # ][ #  # ]:          0 :         if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
     142                 :            :                 return 0;
     143                 :            :         return 1;
     144                 :            : }
     145                 :            : 
     146                 :            : 
     147                 :          0 : static void buffer_io_error(struct buffer_head *bh)
     148                 :            : {
     149                 :            :         char b[BDEVNAME_SIZE];
     150                 :          0 :         printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
     151                 :            :                         bdevname(bh->b_bdev, b),
     152                 :            :                         (unsigned long long)bh->b_blocknr);
     153                 :          0 : }
     154                 :            : 
     155                 :            : /*
     156                 :            :  * End-of-IO handler helper function which does not touch the bh after
     157                 :            :  * unlocking it.
     158                 :            :  * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
     159                 :            :  * a race there is benign: unlock_buffer() only use the bh's address for
     160                 :            :  * hashing after unlocking the buffer, so it doesn't actually touch the bh
     161                 :            :  * itself.
     162                 :            :  */
     163                 :          0 : static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
     164                 :            : {
     165         [ +  - ]:       5255 :         if (uptodate) {
     166                 :            :                 set_buffer_uptodate(bh);
     167                 :            :         } else {
     168                 :            :                 /* This happens, due to failed READA attempts. */
     169                 :            :                 clear_buffer_uptodate(bh);
     170                 :            :         }
     171                 :       5255 :         unlock_buffer(bh);
     172                 :       5255 : }
     173                 :            : 
     174                 :            : /*
     175                 :            :  * Default synchronous end-of-IO handler..  Just mark it up-to-date and
     176                 :            :  * unlock the buffer. This is what ll_rw_block uses too.
     177                 :            :  */
     178                 :          0 : void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
     179                 :            : {
     180                 :       5255 :         __end_buffer_read_notouch(bh, uptodate);
     181                 :            :         put_bh(bh);
     182                 :       5255 : }
     183                 :            : EXPORT_SYMBOL(end_buffer_read_sync);
     184                 :            : 
     185                 :          0 : void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
     186                 :            : {
     187                 :            :         char b[BDEVNAME_SIZE];
     188                 :            : 
     189         [ +  - ]:        623 :         if (uptodate) {
     190                 :            :                 set_buffer_uptodate(bh);
     191                 :            :         } else {
     192         [ #  # ]:          0 :                 if (!quiet_error(bh)) {
     193                 :          0 :                         buffer_io_error(bh);
     194                 :          0 :                         printk(KERN_WARNING "lost page write due to "
     195                 :            :                                         "I/O error on %s\n",
     196                 :            :                                        bdevname(bh->b_bdev, b));
     197                 :            :                 }
     198                 :            :                 set_buffer_write_io_error(bh);
     199                 :            :                 clear_buffer_uptodate(bh);
     200                 :            :         }
     201                 :        623 :         unlock_buffer(bh);
     202                 :            :         put_bh(bh);
     203                 :        623 : }
     204                 :            : EXPORT_SYMBOL(end_buffer_write_sync);
     205                 :            : 
     206                 :            : /*
     207                 :            :  * Various filesystems appear to want __find_get_block to be non-blocking.
     208                 :            :  * But it's the page lock which protects the buffers.  To get around this,
     209                 :            :  * we get exclusion from try_to_free_buffers with the blockdev mapping's
     210                 :            :  * private_lock.
     211                 :            :  *
     212                 :            :  * Hack idea: for the blockdev mapping, i_bufferlist_lock contention
     213                 :            :  * may be quite high.  This code could TryLock the page, and if that
     214                 :            :  * succeeds, there is no need to take private_lock. (But if
     215                 :            :  * private_lock is contended then so is mapping->tree_lock).
     216                 :            :  */
     217                 :            : static struct buffer_head *
     218                 :          0 : __find_get_block_slow(struct block_device *bdev, sector_t block)
     219                 :            : {
     220                 :    3058301 :         struct inode *bd_inode = bdev->bd_inode;
     221                 :    3058301 :         struct address_space *bd_mapping = bd_inode->i_mapping;
     222                 :            :         struct buffer_head *ret = NULL;
     223                 :            :         pgoff_t index;
     224                 :            :         struct buffer_head *bh;
     225                 :            :         struct buffer_head *head;
     226                 :            :         struct page *page;
     227                 :            :         int all_mapped = 1;
     228                 :            : 
     229                 :    3058301 :         index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
     230                 :    3058301 :         page = find_get_page(bd_mapping, index);
     231         [ +  + ]:    3058354 :         if (!page)
     232                 :            :                 goto out;
     233                 :            : 
     234                 :            :         spin_lock(&bd_mapping->private_lock);
     235            [ + ]:     306084 :         if (!page_has_buffers(page))
     236                 :            :                 goto out_unlock;
     237         [ -  + ]:    3364382 :         head = page_buffers(page);
     238                 :            :         bh = head;
     239                 :            :         do {
     240         [ +  - ]:     306081 :                 if (!buffer_mapped(bh))
     241                 :            :                         all_mapped = 0;
     242         [ +  - ]:     306081 :                 else if (bh->b_blocknr == block) {
     243                 :            :                         ret = bh;
     244                 :            :                         get_bh(bh);
     245                 :            :                         goto out_unlock;
     246                 :            :                 }
     247                 :          0 :                 bh = bh->b_this_page;
     248         [ #  # ]:          0 :         } while (bh != head);
     249                 :            : 
     250                 :            :         /* we might be here because some of the buffers on this page are
     251                 :            :          * not mapped.  This is due to various races between
     252                 :            :          * file io on the block device and getblk.  It gets dealt with
     253                 :            :          * elsewhere, don't buffer_error if we had some unmapped buffers
     254                 :            :          */
     255         [ #  # ]:          0 :         if (all_mapped) {
     256                 :            :                 char b[BDEVNAME_SIZE];
     257                 :            : 
     258                 :          0 :                 printk("__find_get_block_slow() failed. "
     259                 :            :                         "block=%llu, b_blocknr=%llu\n",
     260                 :            :                         (unsigned long long)block,
     261                 :            :                         (unsigned long long)bh->b_blocknr);
     262                 :          0 :                 printk("b_state=0x%08lx, b_size=%zu\n",
     263                 :            :                         bh->b_state, bh->b_size);
     264                 :          0 :                 printk("device %s blocksize: %d\n", bdevname(bdev, b),
     265                 :          0 :                         1 << bd_inode->i_blkbits);
     266                 :            :         }
     267                 :            : out_unlock:
     268                 :            :         spin_unlock(&bd_mapping->private_lock);
     269                 :     306084 :         page_cache_release(page);
     270                 :            : out:
     271                 :    3058360 :         return ret;
     272                 :            : }
     273                 :            : 
     274                 :            : /*
     275                 :            :  * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
     276                 :            :  */
     277                 :          0 : static void free_more_memory(void)
     278                 :            : {
     279                 :            :         struct zone *zone;
     280                 :            :         int nid;
     281                 :            : 
     282                 :          0 :         wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
     283                 :          0 :         yield();
     284                 :            : 
     285         [ #  # ]:          0 :         for_each_online_node(nid) {
     286                 :            :                 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
     287                 :            :                                                 gfp_zone(GFP_NOFS), NULL,
     288                 :            :                                                 &zone);
     289         [ #  # ]:          0 :                 if (zone)
     290                 :          0 :                         try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
     291                 :            :                                                 GFP_NOFS, NULL);
     292                 :            :         }
     293                 :          0 : }
     294                 :            : 
     295                 :            : /*
     296                 :            :  * I/O completion handler for block_read_full_page() - pages
     297                 :            :  * which come unlocked at the end of I/O.
     298                 :            :  */
     299                 :          0 : static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
     300                 :            : {
     301                 :            :         unsigned long flags;
     302                 :            :         struct buffer_head *first;
     303                 :            :         struct buffer_head *tmp;
     304                 :            :         struct page *page;
     305                 :            :         int page_uptodate = 1;
     306                 :            : 
     307         [ -  + ]:        668 :         BUG_ON(!buffer_async_read(bh));
     308                 :            : 
     309                 :        668 :         page = bh->b_page;
     310         [ +  - ]:        668 :         if (uptodate) {
     311                 :            :                 set_buffer_uptodate(bh);
     312                 :            :         } else {
     313                 :            :                 clear_buffer_uptodate(bh);
     314         [ #  # ]:          0 :                 if (!quiet_error(bh))
     315                 :          0 :                         buffer_io_error(bh);
     316                 :            :                 SetPageError(page);
     317                 :            :         }
     318                 :            : 
     319                 :            :         /*
     320                 :            :          * Be _very_ careful from here on. Bad things can happen if
     321                 :            :          * two buffer heads end IO at almost the same time and both
     322                 :            :          * decide that the page is now completely done.
     323                 :            :          */
     324         [ -  + ]:        668 :         first = page_buffers(page);
     325                 :            :         local_irq_save(flags);
     326                 :        668 :         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
     327                 :            :         clear_buffer_async_read(bh);
     328                 :        668 :         unlock_buffer(bh);
     329                 :            :         tmp = bh;
     330                 :            :         do {
     331         [ -  + ]:       1336 :                 if (!buffer_uptodate(tmp))
     332                 :            :                         page_uptodate = 0;
     333         [ -  + ]:        668 :                 if (buffer_async_read(tmp)) {
     334         [ #  # ]:          0 :                         BUG_ON(!buffer_locked(tmp));
     335                 :            :                         goto still_busy;
     336                 :            :                 }
     337                 :        668 :                 tmp = tmp->b_this_page;
     338         [ -  + ]:        668 :         } while (tmp != bh);
     339                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     340         [ -  + ]:        668 :         local_irq_restore(flags);
     341                 :            : 
     342                 :            :         /*
     343                 :            :          * If none of the buffers had errors and they are all
     344                 :            :          * uptodate then we can set the page uptodate.
     345                 :            :          */
     346 [ +  - ][ +  - ]:        668 :         if (page_uptodate && !PageError(page))
     347                 :            :                 SetPageUptodate(page);
     348                 :        668 :         unlock_page(page);
     349                 :        668 :         return;
     350                 :            : 
     351                 :            : still_busy:
     352                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     353         [ #  # ]:          0 :         local_irq_restore(flags);
     354                 :            :         return;
     355                 :            : }
     356                 :            : 
     357                 :            : /*
     358                 :            :  * Completion handler for block_write_full_page() - pages which are unlocked
     359                 :            :  * during I/O, and which have PageWriteback cleared upon I/O completion.
     360                 :            :  */
     361                 :          0 : void end_buffer_async_write(struct buffer_head *bh, int uptodate)
     362                 :            : {
     363                 :            :         char b[BDEVNAME_SIZE];
     364                 :            :         unsigned long flags;
     365                 :            :         struct buffer_head *first;
     366                 :            :         struct buffer_head *tmp;
     367                 :            :         struct page *page;
     368                 :            : 
     369         [ -  + ]:      19852 :         BUG_ON(!buffer_async_write(bh));
     370                 :            : 
     371                 :      19852 :         page = bh->b_page;
     372         [ +  - ]:      19852 :         if (uptodate) {
     373                 :            :                 set_buffer_uptodate(bh);
     374                 :            :         } else {
     375         [ #  # ]:          0 :                 if (!quiet_error(bh)) {
     376                 :          0 :                         buffer_io_error(bh);
     377                 :          0 :                         printk(KERN_WARNING "lost page write due to "
     378                 :            :                                         "I/O error on %s\n",
     379                 :            :                                bdevname(bh->b_bdev, b));
     380                 :            :                 }
     381                 :          0 :                 set_bit(AS_EIO, &page->mapping->flags);
     382                 :            :                 set_buffer_write_io_error(bh);
     383                 :            :                 clear_buffer_uptodate(bh);
     384                 :            :                 SetPageError(page);
     385                 :            :         }
     386                 :            : 
     387         [ -  + ]:      19852 :         first = page_buffers(page);
     388                 :            :         local_irq_save(flags);
     389                 :      19852 :         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
     390                 :            : 
     391                 :            :         clear_buffer_async_write(bh);
     392                 :      19852 :         unlock_buffer(bh);
     393                 :      19852 :         tmp = bh->b_this_page;
     394         [ -  + ]:      19852 :         while (tmp != bh) {
     395         [ #  # ]:          0 :                 if (buffer_async_write(tmp)) {
     396         [ #  # ]:          0 :                         BUG_ON(!buffer_locked(tmp));
     397                 :            :                         goto still_busy;
     398                 :            :                 }
     399                 :          0 :                 tmp = tmp->b_this_page;
     400                 :            :         }
     401                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     402         [ -  + ]:      19852 :         local_irq_restore(flags);
     403                 :      19852 :         end_page_writeback(page);
     404                 :      19852 :         return;
     405                 :            : 
     406                 :            : still_busy:
     407                 :            :         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
     408         [ #  # ]:          0 :         local_irq_restore(flags);
     409                 :            :         return;
     410                 :            : }
     411                 :            : EXPORT_SYMBOL(end_buffer_async_write);
     412                 :            : 
     413                 :            : /*
     414                 :            :  * If a page's buffers are under async readin (end_buffer_async_read
     415                 :            :  * completion) then there is a possibility that another thread of
     416                 :            :  * control could lock one of the buffers after it has completed
     417                 :            :  * but while some of the other buffers have not completed.  This
     418                 :            :  * locked buffer would confuse end_buffer_async_read() into not unlocking
     419                 :            :  * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
     420                 :            :  * that this buffer is not under async I/O.
     421                 :            :  *
     422                 :            :  * The page comes unlocked when it has no locked buffer_async buffers
     423                 :            :  * left.
     424                 :            :  *
     425                 :            :  * PageLocked prevents anyone starting new async I/O reads any of
     426                 :            :  * the buffers.
     427                 :            :  *
     428                 :            :  * PageWriteback is used to prevent simultaneous writeout of the same
     429                 :            :  * page.
     430                 :            :  *
     431                 :            :  * PageLocked prevents anyone from starting writeback of a page which is
     432                 :            :  * under read I/O (PageWriteback is only ever set against a locked page).
     433                 :            :  */
     434                 :            : static void mark_buffer_async_read(struct buffer_head *bh)
     435                 :            : {
     436                 :        668 :         bh->b_end_io = end_buffer_async_read;
     437                 :            :         set_buffer_async_read(bh);
     438                 :            : }
     439                 :            : 
     440                 :            : static void mark_buffer_async_write_endio(struct buffer_head *bh,
     441                 :            :                                           bh_end_io_t *handler)
     442                 :            : {
     443                 :      19852 :         bh->b_end_io = handler;
     444                 :            :         set_buffer_async_write(bh);
     445                 :            : }
     446                 :            : 
     447                 :          0 : void mark_buffer_async_write(struct buffer_head *bh)
     448                 :            : {
     449                 :            :         mark_buffer_async_write_endio(bh, end_buffer_async_write);
     450                 :          0 : }
     451                 :            : EXPORT_SYMBOL(mark_buffer_async_write);
     452                 :            : 
     453                 :            : 
     454                 :            : /*
     455                 :            :  * fs/buffer.c contains helper functions for buffer-backed address space's
     456                 :            :  * fsync functions.  A common requirement for buffer-based filesystems is
     457                 :            :  * that certain data from the backing blockdev needs to be written out for
     458                 :            :  * a successful fsync().  For example, ext2 indirect blocks need to be
     459                 :            :  * written back and waited upon before fsync() returns.
     460                 :            :  *
     461                 :            :  * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
     462                 :            :  * inode_has_buffers() and invalidate_inode_buffers() are provided for the
     463                 :            :  * management of a list of dependent buffers at ->i_mapping->private_list.
     464                 :            :  *
     465                 :            :  * Locking is a little subtle: try_to_free_buffers() will remove buffers
     466                 :            :  * from their controlling inode's queue when they are being freed.  But
     467                 :            :  * try_to_free_buffers() will be operating against the *blockdev* mapping
     468                 :            :  * at the time, not against the S_ISREG file which depends on those buffers.
     469                 :            :  * So the locking for private_list is via the private_lock in the address_space
     470                 :            :  * which backs the buffers.  Which is different from the address_space 
     471                 :            :  * against which the buffers are listed.  So for a particular address_space,
     472                 :            :  * mapping->private_lock does *not* protect mapping->private_list!  In fact,
     473                 :            :  * mapping->private_list will always be protected by the backing blockdev's
     474                 :            :  * ->private_lock.
     475                 :            :  *
     476                 :            :  * Which introduces a requirement: all buffers on an address_space's
     477                 :            :  * ->private_list must be from the same address_space: the blockdev's.
     478                 :            :  *
     479                 :            :  * address_spaces which do not place buffers at ->private_list via these
     480                 :            :  * utility functions are free to use private_lock and private_list for
     481                 :            :  * whatever they want.  The only requirement is that list_empty(private_list)
     482                 :            :  * be true at clear_inode() time.
     483                 :            :  *
     484                 :            :  * FIXME: clear_inode should not call invalidate_inode_buffers().  The
     485                 :            :  * filesystems should do that.  invalidate_inode_buffers() should just go
     486                 :            :  * BUG_ON(!list_empty).
     487                 :            :  *
     488                 :            :  * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
     489                 :            :  * take an address_space, not an inode.  And it should be called
     490                 :            :  * mark_buffer_dirty_fsync() to clearly define why those buffers are being
     491                 :            :  * queued up.
     492                 :            :  *
     493                 :            :  * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
     494                 :            :  * list if it is already on a list.  Because if the buffer is on a list,
     495                 :            :  * it *must* already be on the right one.  If not, the filesystem is being
     496                 :            :  * silly.  This will save a ton of locking.  But first we have to ensure
     497                 :            :  * that buffers are taken *off* the old inode's list when they are freed
     498                 :            :  * (presumably in truncate).  That requires careful auditing of all
     499                 :            :  * filesystems (do it inside bforget()).  It could also be done by bringing
     500                 :            :  * b_inode back.
     501                 :            :  */
     502                 :            : 
     503                 :            : /*
     504                 :            :  * The buffer's backing address_space's private_lock must be held
     505                 :            :  */
     506                 :          0 : static void __remove_assoc_queue(struct buffer_head *bh)
     507                 :            : {
     508                 :          0 :         list_del_init(&bh->b_assoc_buffers);
     509         [ #  # ]:          0 :         WARN_ON(!bh->b_assoc_map);
     510         [ #  # ]:          0 :         if (buffer_write_io_error(bh))
     511                 :          0 :                 set_bit(AS_EIO, &bh->b_assoc_map->flags);
     512                 :          0 :         bh->b_assoc_map = NULL;
     513                 :          0 : }
     514                 :            : 
     515                 :          0 : int inode_has_buffers(struct inode *inode)
     516                 :            : {
     517                 :    4478956 :         return !list_empty(&inode->i_data.private_list);
     518                 :            : }
     519                 :            : 
     520                 :            : /*
     521                 :            :  * osync is designed to support O_SYNC io.  It waits synchronously for
     522                 :            :  * all already-submitted IO to complete, but does not queue any new
     523                 :            :  * writes to the disk.
     524                 :            :  *
     525                 :            :  * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
     526                 :            :  * you dirty the buffers, and then use osync_inode_buffers to wait for
     527                 :            :  * completion.  Any other dirty buffers which are not yet queued for
     528                 :            :  * write will not be flushed to disk by the osync.
     529                 :            :  */
     530                 :          0 : static int osync_buffers_list(spinlock_t *lock, struct list_head *list)
     531                 :            : {
     532                 :            :         struct buffer_head *bh;
     533                 :            :         struct list_head *p;
     534                 :            :         int err = 0;
     535                 :            : 
     536                 :            :         spin_lock(lock);
     537                 :            : repeat:
     538         [ #  # ]:          0 :         list_for_each_prev(p, list) {
     539                 :          0 :                 bh = BH_ENTRY(p);
     540         [ #  # ]:          0 :                 if (buffer_locked(bh)) {
     541                 :            :                         get_bh(bh);
     542                 :            :                         spin_unlock(lock);
     543                 :            :                         wait_on_buffer(bh);
     544         [ #  # ]:          0 :                         if (!buffer_uptodate(bh))
     545                 :            :                                 err = -EIO;
     546                 :            :                         brelse(bh);
     547                 :            :                         spin_lock(lock);
     548                 :            :                         goto repeat;
     549                 :            :                 }
     550                 :            :         }
     551                 :            :         spin_unlock(lock);
     552                 :          0 :         return err;
     553                 :            : }
     554                 :            : 
     555                 :          0 : static void do_thaw_one(struct super_block *sb, void *unused)
     556                 :            : {
     557                 :            :         char b[BDEVNAME_SIZE];
     558 [ #  # ][ #  # ]:          0 :         while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
     559                 :          0 :                 printk(KERN_WARNING "Emergency Thaw on %s\n",
     560                 :            :                        bdevname(sb->s_bdev, b));
     561                 :          0 : }
     562                 :            : 
     563                 :          0 : static void do_thaw_all(struct work_struct *work)
     564                 :            : {
     565                 :          0 :         iterate_supers(do_thaw_one, NULL);
     566                 :          0 :         kfree(work);
     567                 :          0 :         printk(KERN_WARNING "Emergency Thaw complete\n");
     568                 :          0 : }
     569                 :            : 
     570                 :            : /**
     571                 :            :  * emergency_thaw_all -- forcibly thaw every frozen filesystem
     572                 :            :  *
     573                 :            :  * Used for emergency unfreeze of all filesystems via SysRq
     574                 :            :  */
     575                 :          0 : void emergency_thaw_all(void)
     576                 :            : {
     577                 :            :         struct work_struct *work;
     578                 :            : 
     579                 :            :         work = kmalloc(sizeof(*work), GFP_ATOMIC);
     580         [ #  # ]:          0 :         if (work) {
     581                 :          0 :                 INIT_WORK(work, do_thaw_all);
     582                 :            :                 schedule_work(work);
     583                 :            :         }
     584                 :          0 : }
     585                 :            : 
     586                 :            : /**
     587                 :            :  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
     588                 :            :  * @mapping: the mapping which wants those buffers written
     589                 :            :  *
     590                 :            :  * Starts I/O against the buffers at mapping->private_list, and waits upon
     591                 :            :  * that I/O.
     592                 :            :  *
     593                 :            :  * Basically, this is a convenience function for fsync().
     594                 :            :  * @mapping is a file or directory which needs those buffers to be written for
     595                 :            :  * a successful fsync().
     596                 :            :  */
     597                 :          0 : int sync_mapping_buffers(struct address_space *mapping)
     598                 :            : {
     599                 :          0 :         struct address_space *buffer_mapping = mapping->private_data;
     600                 :            : 
     601 [ #  # ][ #  # ]:          0 :         if (buffer_mapping == NULL || list_empty(&mapping->private_list))
     602                 :            :                 return 0;
     603                 :            : 
     604                 :          0 :         return fsync_buffers_list(&buffer_mapping->private_lock,
     605                 :            :                                         &mapping->private_list);
     606                 :            : }
     607                 :            : EXPORT_SYMBOL(sync_mapping_buffers);
     608                 :            : 
     609                 :            : /*
     610                 :            :  * Called when we've recently written block `bblock', and it is known that
     611                 :            :  * `bblock' was for a buffer_boundary() buffer.  This means that the block at
     612                 :            :  * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
     613                 :            :  * dirty, schedule it for IO.  So that indirects merge nicely with their data.
     614                 :            :  */
     615                 :          0 : void write_boundary_block(struct block_device *bdev,
     616                 :            :                         sector_t bblock, unsigned blocksize)
     617                 :            : {
     618                 :          0 :         struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
     619         [ #  # ]:          0 :         if (bh) {
     620         [ #  # ]:          0 :                 if (buffer_dirty(bh))
     621                 :          0 :                         ll_rw_block(WRITE, 1, &bh);
     622                 :          0 :                 put_bh(bh);
     623                 :            :         }
     624                 :          0 : }
     625                 :            : 
     626                 :          0 : void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
     627                 :            : {
     628                 :          0 :         struct address_space *mapping = inode->i_mapping;
     629                 :          0 :         struct address_space *buffer_mapping = bh->b_page->mapping;
     630                 :            : 
     631                 :          0 :         mark_buffer_dirty(bh);
     632         [ #  # ]:          0 :         if (!mapping->private_data) {
     633                 :          0 :                 mapping->private_data = buffer_mapping;
     634                 :            :         } else {
     635         [ #  # ]:          0 :                 BUG_ON(mapping->private_data != buffer_mapping);
     636                 :            :         }
     637         [ #  # ]:          0 :         if (!bh->b_assoc_map) {
     638                 :            :                 spin_lock(&buffer_mapping->private_lock);
     639                 :          0 :                 list_move_tail(&bh->b_assoc_buffers,
     640                 :            :                                 &mapping->private_list);
     641                 :          0 :                 bh->b_assoc_map = mapping;
     642                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     643                 :            :         }
     644                 :          0 : }
     645                 :            : EXPORT_SYMBOL(mark_buffer_dirty_inode);
     646                 :            : 
     647                 :            : /*
     648                 :            :  * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
     649                 :            :  * dirty.
     650                 :            :  *
     651                 :            :  * If warn is true, then emit a warning if the page is not uptodate and has
     652                 :            :  * not been truncated.
     653                 :            :  */
     654                 :          0 : static void __set_page_dirty(struct page *page,
     655                 :            :                 struct address_space *mapping, int warn)
     656                 :            : {
     657                 :            :         spin_lock_irq(&mapping->tree_lock);
     658         [ +  + ]:    2139281 :         if (page->mapping) { /* Race with truncate? */
     659      [ -  +  - ]:    2139268 :                 WARN_ON_ONCE(warn && !PageUptodate(page));
         [ -  + ][ #  # ]
                 [ #  # ]
     660                 :    2139268 :                 account_page_dirtied(page, mapping);
     661                 :    2139278 :                 radix_tree_tag_set(&mapping->page_tree,
     662                 :            :                                 page_index(page), PAGECACHE_TAG_DIRTY);
     663                 :            :         }
     664                 :            :         spin_unlock_irq(&mapping->tree_lock);
     665                 :    2139302 :         __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
     666                 :    2139245 : }
     667                 :            : 
     668                 :            : /*
     669                 :            :  * Add a page to the dirty page list.
     670                 :            :  *
     671                 :            :  * It is a sad fact of life that this function is called from several places
     672                 :            :  * deeply under spinlocking.  It may not sleep.
     673                 :            :  *
     674                 :            :  * If the page has buffers, the uptodate buffers are set dirty, to preserve
     675                 :            :  * dirty-state coherency between the page and the buffers.  It the page does
     676                 :            :  * not have buffers then when they are later attached they will all be set
     677                 :            :  * dirty.
     678                 :            :  *
     679                 :            :  * The buffers are dirtied before the page is dirtied.  There's a small race
     680                 :            :  * window in which a writepage caller may see the page cleanness but not the
     681                 :            :  * buffer dirtiness.  That's fine.  If this code were to set the page dirty
     682                 :            :  * before the buffers, a concurrent writepage caller could clear the page dirty
     683                 :            :  * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
     684                 :            :  * page on the dirty page list.
     685                 :            :  *
     686                 :            :  * We use private_lock to lock against try_to_free_buffers while using the
     687                 :            :  * page's buffer list.  Also use this to protect against clean buffers being
     688                 :            :  * added to the page after it was set dirty.
     689                 :            :  *
     690                 :            :  * FIXME: may need to call ->reservepage here as well.  That's rather up to the
     691                 :            :  * address_space though.
     692                 :            :  */
     693                 :          0 : int __set_page_dirty_buffers(struct page *page)
     694                 :            : {
     695                 :            :         int newly_dirty;
     696                 :     792976 :         struct address_space *mapping = page_mapping(page);
     697                 :            : 
     698         [ -  + ]:     793047 :         if (unlikely(!mapping))
     699                 :          0 :                 return !TestSetPageDirty(page);
     700                 :            : 
     701                 :            :         spin_lock(&mapping->private_lock);
     702         [ +  - ]:     793082 :         if (page_has_buffers(page)) {
     703         [ -  + ]:     793082 :                 struct buffer_head *head = page_buffers(page);
     704                 :            :                 struct buffer_head *bh = head;
     705                 :            : 
     706                 :            :                 do {
     707                 :            :                         set_buffer_dirty(bh);
     708                 :     793082 :                         bh = bh->b_this_page;
     709         [ -  + ]:     793082 :                 } while (bh != head);
     710                 :            :         }
     711                 :     793082 :         newly_dirty = !TestSetPageDirty(page);
     712                 :            :         spin_unlock(&mapping->private_lock);
     713                 :            : 
     714         [ -  + ]:     793082 :         if (newly_dirty)
     715                 :          0 :                 __set_page_dirty(page, mapping, 1);
     716                 :     793082 :         return newly_dirty;
     717                 :            : }
     718                 :            : EXPORT_SYMBOL(__set_page_dirty_buffers);
     719                 :            : 
     720                 :            : /*
     721                 :            :  * Write out and wait upon a list of buffers.
     722                 :            :  *
     723                 :            :  * We have conflicting pressures: we want to make sure that all
     724                 :            :  * initially dirty buffers get waited on, but that any subsequently
     725                 :            :  * dirtied buffers don't.  After all, we don't want fsync to last
     726                 :            :  * forever if somebody is actively writing to the file.
     727                 :            :  *
     728                 :            :  * Do this in two main stages: first we copy dirty buffers to a
     729                 :            :  * temporary inode list, queueing the writes as we go.  Then we clean
     730                 :            :  * up, waiting for those writes to complete.
     731                 :            :  * 
     732                 :            :  * During this second stage, any subsequent updates to the file may end
     733                 :            :  * up refiling the buffer on the original inode's dirty list again, so
     734                 :            :  * there is a chance we will end up with a buffer queued for write but
     735                 :            :  * not yet completed on that list.  So, as a final cleanup we go through
     736                 :            :  * the osync code to catch these locked, dirty buffers without requeuing
     737                 :            :  * any newly dirty buffers for write.
     738                 :            :  */
     739                 :          0 : static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
     740                 :            : {
     741                 :            :         struct buffer_head *bh;
     742                 :            :         struct list_head tmp;
     743                 :            :         struct address_space *mapping;
     744                 :            :         int err = 0, err2;
     745                 :            :         struct blk_plug plug;
     746                 :            : 
     747                 :            :         INIT_LIST_HEAD(&tmp);
     748                 :          0 :         blk_start_plug(&plug);
     749                 :            : 
     750                 :            :         spin_lock(lock);
     751         [ #  # ]:          0 :         while (!list_empty(list)) {
     752                 :          0 :                 bh = BH_ENTRY(list->next);
     753                 :          0 :                 mapping = bh->b_assoc_map;
     754                 :          0 :                 __remove_assoc_queue(bh);
     755                 :            :                 /* Avoid race with mark_buffer_dirty_inode() which does
     756                 :            :                  * a lockless check and we rely on seeing the dirty bit */
     757                 :          0 :                 smp_mb();
     758 [ #  # ][ #  # ]:          0 :                 if (buffer_dirty(bh) || buffer_locked(bh)) {
     759                 :          0 :                         list_add(&bh->b_assoc_buffers, &tmp);
     760                 :          0 :                         bh->b_assoc_map = mapping;
     761         [ #  # ]:          0 :                         if (buffer_dirty(bh)) {
     762                 :            :                                 get_bh(bh);
     763                 :            :                                 spin_unlock(lock);
     764                 :            :                                 /*
     765                 :            :                                  * Ensure any pending I/O completes so that
     766                 :            :                                  * write_dirty_buffer() actually writes the
     767                 :            :                                  * current contents - it is a noop if I/O is
     768                 :            :                                  * still in flight on potentially older
     769                 :            :                                  * contents.
     770                 :            :                                  */
     771                 :          0 :                                 write_dirty_buffer(bh, WRITE_SYNC);
     772                 :            : 
     773                 :            :                                 /*
     774                 :            :                                  * Kick off IO for the previous mapping. Note
     775                 :            :                                  * that we will not run the very last mapping,
     776                 :            :                                  * wait_on_buffer() will do that for us
     777                 :            :                                  * through sync_buffer().
     778                 :            :                                  */
     779                 :            :                                 brelse(bh);
     780                 :            :                                 spin_lock(lock);
     781                 :            :                         }
     782                 :            :                 }
     783                 :            :         }
     784                 :            : 
     785                 :            :         spin_unlock(lock);
     786                 :          0 :         blk_finish_plug(&plug);
     787                 :            :         spin_lock(lock);
     788                 :            : 
     789         [ #  # ]:          0 :         while (!list_empty(&tmp)) {
     790                 :          0 :                 bh = BH_ENTRY(tmp.prev);
     791                 :            :                 get_bh(bh);
     792                 :          0 :                 mapping = bh->b_assoc_map;
     793                 :          0 :                 __remove_assoc_queue(bh);
     794                 :            :                 /* Avoid race with mark_buffer_dirty_inode() which does
     795                 :            :                  * a lockless check and we rely on seeing the dirty bit */
     796                 :          0 :                 smp_mb();
     797         [ #  # ]:          0 :                 if (buffer_dirty(bh)) {
     798                 :          0 :                         list_add(&bh->b_assoc_buffers,
     799                 :            :                                  &mapping->private_list);
     800                 :          0 :                         bh->b_assoc_map = mapping;
     801                 :            :                 }
     802                 :            :                 spin_unlock(lock);
     803                 :            :                 wait_on_buffer(bh);
     804         [ #  # ]:          0 :                 if (!buffer_uptodate(bh))
     805                 :            :                         err = -EIO;
     806                 :            :                 brelse(bh);
     807                 :            :                 spin_lock(lock);
     808                 :            :         }
     809                 :            :         
     810                 :            :         spin_unlock(lock);
     811                 :          0 :         err2 = osync_buffers_list(lock, list);
     812         [ #  # ]:          0 :         if (err)
     813                 :            :                 return err;
     814                 :            :         else
     815                 :          0 :                 return err2;
     816                 :            : }
     817                 :            : 
     818                 :            : /*
     819                 :            :  * Invalidate any and all dirty buffers on a given inode.  We are
     820                 :            :  * probably unmounting the fs, but that doesn't mean we have already
     821                 :            :  * done a sync().  Just drop the buffers from the inode list.
     822                 :            :  *
     823                 :            :  * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
     824                 :            :  * assumes that all the buffers are against the blockdev.  Not true
     825                 :            :  * for reiserfs.
     826                 :            :  */
     827                 :          0 : void invalidate_inode_buffers(struct inode *inode)
     828                 :            : {
     829         [ -  + ]:     476186 :         if (inode_has_buffers(inode)) {
     830                 :            :                 struct address_space *mapping = &inode->i_data;
     831                 :            :                 struct list_head *list = &mapping->private_list;
     832                 :          0 :                 struct address_space *buffer_mapping = mapping->private_data;
     833                 :            : 
     834                 :            :                 spin_lock(&buffer_mapping->private_lock);
     835         [ #  # ]:          0 :                 while (!list_empty(list))
     836                 :          0 :                         __remove_assoc_queue(BH_ENTRY(list->next));
     837                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     838                 :            :         }
     839                 :          0 : }
     840                 :            : EXPORT_SYMBOL(invalidate_inode_buffers);
     841                 :            : 
     842                 :            : /*
     843                 :            :  * Remove any clean buffers from the inode's buffer list.  This is called
     844                 :            :  * when we're trying to free the inode itself.  Those buffers can pin it.
     845                 :            :  *
     846                 :            :  * Returns true if all buffers were removed.
     847                 :            :  */
     848                 :          0 : int remove_inode_buffers(struct inode *inode)
     849                 :            : {
     850                 :            :         int ret = 1;
     851                 :            : 
     852         [ -  + ]:         36 :         if (inode_has_buffers(inode)) {
     853                 :            :                 struct address_space *mapping = &inode->i_data;
     854                 :            :                 struct list_head *list = &mapping->private_list;
     855                 :          0 :                 struct address_space *buffer_mapping = mapping->private_data;
     856                 :            : 
     857                 :            :                 spin_lock(&buffer_mapping->private_lock);
     858         [ #  # ]:          0 :                 while (!list_empty(list)) {
     859                 :          0 :                         struct buffer_head *bh = BH_ENTRY(list->next);
     860         [ #  # ]:          0 :                         if (buffer_dirty(bh)) {
     861                 :            :                                 ret = 0;
     862                 :            :                                 break;
     863                 :            :                         }
     864                 :          0 :                         __remove_assoc_queue(bh);
     865                 :            :                 }
     866                 :            :                 spin_unlock(&buffer_mapping->private_lock);
     867                 :            :         }
     868                 :         36 :         return ret;
     869                 :            : }
     870                 :            : 
     871                 :            : /*
     872                 :            :  * Create the appropriate buffers when given a page for data area and
     873                 :            :  * the size of each buffer.. Use the bh->b_this_page linked list to
     874                 :            :  * follow the buffers created.  Return NULL if unable to create more
     875                 :            :  * buffers.
     876                 :            :  *
     877                 :            :  * The retry flag is used to differentiate async IO (paging, swapping)
     878                 :            :  * which may not fail from ordinary buffer allocations.
     879                 :            :  */
     880                 :    1898705 : struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
     881                 :            :                 int retry)
     882                 :            : {
     883                 :            :         struct buffer_head *bh, *head;
     884                 :            :         long offset;
     885                 :            : 
     886                 :            : try_again:
     887                 :            :         head = NULL;
     888                 :            :         offset = PAGE_SIZE;
     889         [ +  + ]:    3797504 :         while ((offset -= size) >= 0) {
     890                 :    1898800 :                 bh = alloc_buffer_head(GFP_NOFS);
     891         [ +  - ]:    1898704 :                 if (!bh)
     892                 :            :                         goto no_grow;
     893                 :            : 
     894                 :    1898704 :                 bh->b_this_page = head;
     895                 :    1898704 :                 bh->b_blocknr = -1;
     896                 :            :                 head = bh;
     897                 :            : 
     898                 :    1898704 :                 bh->b_size = size;
     899                 :            : 
     900                 :            :                 /* Link the buffer to its page */
     901                 :    1898704 :                 set_bh_page(bh, page, offset);
     902                 :            :         }
     903                 :            :         return head;
     904                 :            : /*
     905                 :            :  * In case anything failed, we just free everything we got.
     906                 :            :  */
     907                 :            : no_grow:
     908         [ #  # ]:          0 :         if (head) {
     909                 :            :                 do {
     910                 :            :                         bh = head;
     911                 :          0 :                         head = head->b_this_page;
     912                 :          0 :                         free_buffer_head(bh);
     913         [ #  # ]:          0 :                 } while (head);
     914                 :            :         }
     915                 :            : 
     916                 :            :         /*
     917                 :            :          * Return failure for non-async IO requests.  Async IO requests
     918                 :            :          * are not allowed to fail, so we have to wait until buffer heads
     919                 :            :          * become available.  But we don't want tasks sleeping with 
     920                 :            :          * partially complete buffers, so all were released above.
     921                 :            :          */
     922         [ #  # ]:          0 :         if (!retry)
     923                 :            :                 return NULL;
     924                 :            : 
     925                 :            :         /* We're _really_ low on memory. Now we just
     926                 :            :          * wait for old buffer heads to become free due to
     927                 :            :          * finishing IO.  Since this is an async request and
     928                 :            :          * the reserve list is empty, we're sure there are 
     929                 :            :          * async buffer heads in use.
     930                 :            :          */
     931                 :          0 :         free_more_memory();
     932                 :          0 :         goto try_again;
     933                 :            : }
     934                 :            : EXPORT_SYMBOL_GPL(alloc_page_buffers);
     935                 :            : 
     936                 :            : static inline void
     937                 :            : link_dev_buffers(struct page *page, struct buffer_head *head)
     938                 :            : {
     939                 :            :         struct buffer_head *bh, *tail;
     940                 :            : 
     941                 :            :         bh = head;
     942                 :            :         do {
     943                 :            :                 tail = bh;
     944                 :      59594 :                 bh = bh->b_this_page;
     945         [ -  + ]:      59594 :         } while (bh);
     946                 :      59594 :         tail->b_this_page = head;
     947                 :            :         attach_page_buffers(page, head);
     948                 :            : }
     949                 :            : 
     950                 :          0 : static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
     951                 :            : {
     952                 :            :         sector_t retval = ~((sector_t)0);
     953                 :      59596 :         loff_t sz = i_size_read(bdev->bd_inode);
     954                 :            : 
     955            [ + ]:      59596 :         if (sz) {
     956                 :            :                 unsigned int sizebits = blksize_bits(size);
     957                 :      59596 :                 retval = (sz >> sizebits);
     958                 :            :         }
     959                 :          0 :         return retval;
     960                 :            : }
     961                 :            : 
     962                 :            : /*
     963                 :            :  * Initialise the state of a blockdev page's buffers.
     964                 :            :  */ 
     965                 :            : static sector_t
     966                 :          0 : init_page_buffers(struct page *page, struct block_device *bdev,
     967                 :            :                         sector_t block, int size)
     968                 :            : {
     969         [ -  + ]:      59596 :         struct buffer_head *head = page_buffers(page);
     970                 :            :         struct buffer_head *bh = head;
     971                 :            :         int uptodate = PageUptodate(page);
     972                 :      59596 :         sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
     973                 :            : 
     974                 :            :         do {
     975         [ +  + ]:     119192 :                 if (!buffer_mapped(bh)) {
     976                 :            :                         init_buffer(bh, NULL, NULL);
     977                 :      59594 :                         bh->b_bdev = bdev;
     978                 :      59594 :                         bh->b_blocknr = block;
     979         [ -  + ]:      59594 :                         if (uptodate)
     980                 :            :                                 set_buffer_uptodate(bh);
     981         [ +  - ]:      59594 :                         if (block < end_block)
     982                 :            :                                 set_buffer_mapped(bh);
     983                 :            :                 }
     984                 :      59596 :                 block++;
     985                 :      59596 :                 bh = bh->b_this_page;
     986         [ -  + ]:      59596 :         } while (bh != head);
     987                 :            : 
     988                 :            :         /*
     989                 :            :          * Caller needs to validate requested block against end of device.
     990                 :            :          */
     991                 :      59596 :         return end_block;
     992                 :            : }
     993                 :            : 
     994                 :            : /*
     995                 :            :  * Create the page-cache page that contains the requested block.
     996                 :            :  *
     997                 :            :  * This is used purely for blockdev mappings.
     998                 :            :  */
     999                 :            : static int
    1000                 :          0 : grow_dev_page(struct block_device *bdev, sector_t block,
    1001                 :            :                 pgoff_t index, int size, int sizebits)
    1002                 :            : {
    1003                 :      59596 :         struct inode *inode = bdev->bd_inode;
    1004                 :            :         struct page *page;
    1005                 :            :         struct buffer_head *bh;
    1006                 :            :         sector_t end_block;
    1007                 :            :         int ret = 0;            /* Will call free_more_memory() */
    1008                 :            :         gfp_t gfp_mask;
    1009                 :            : 
    1010                 :     119192 :         gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
    1011                 :            :         gfp_mask |= __GFP_MOVABLE;
    1012                 :            :         /*
    1013                 :            :          * XXX: __getblk_slow() can not really deal with failure and
    1014                 :            :          * will endlessly loop on improvised global reclaim.  Prefer
    1015                 :            :          * looping in the allocator rather than here, at least that
    1016                 :            :          * code knows what it's doing.
    1017                 :            :          */
    1018                 :      59596 :         gfp_mask |= __GFP_NOFAIL;
    1019                 :            : 
    1020                 :      59596 :         page = find_or_create_page(inode->i_mapping, index, gfp_mask);
    1021         [ +  - ]:      59596 :         if (!page)
    1022                 :            :                 return ret;
    1023                 :            : 
    1024         [ -  + ]:      59596 :         BUG_ON(!PageLocked(page));
    1025                 :            : 
    1026         [ +  + ]:      59596 :         if (page_has_buffers(page)) {
    1027         [ -  + ]:          2 :                 bh = page_buffers(page);
    1028         [ +  - ]:          2 :                 if (bh->b_size == size) {
    1029                 :          2 :                         end_block = init_page_buffers(page, bdev,
    1030                 :          2 :                                                 index << sizebits, size);
    1031                 :          2 :                         goto done;
    1032                 :            :                 }
    1033         [ #  # ]:          0 :                 if (!try_to_free_buffers(page))
    1034                 :            :                         goto failed;
    1035                 :            :         }
    1036                 :            : 
    1037                 :            :         /*
    1038                 :            :          * Allocate some buffers for this page
    1039                 :            :          */
    1040                 :      59594 :         bh = alloc_page_buffers(page, size, 0);
    1041            [ + ]:      59593 :         if (!bh)
    1042                 :            :                 goto failed;
    1043                 :            : 
    1044                 :            :         /*
    1045                 :            :          * Link the page to the buffers and initialise them.  Take the
    1046                 :            :          * lock to be atomic wrt __find_get_block(), which does not
    1047                 :            :          * run under the page lock.
    1048                 :            :          */
    1049                 :      59594 :         spin_lock(&inode->i_mapping->private_lock);
    1050                 :            :         link_dev_buffers(page, bh);
    1051                 :      59594 :         end_block = init_page_buffers(page, bdev, index << sizebits, size);
    1052                 :      59594 :         spin_unlock(&inode->i_mapping->private_lock);
    1053                 :            : done:
    1054         [ -  + ]:      59596 :         ret = (block < end_block) ? 1 : -ENXIO;
    1055                 :            : failed:
    1056                 :      59595 :         unlock_page(page);
    1057                 :      59596 :         page_cache_release(page);
    1058                 :      59596 :         return ret;
    1059                 :            : }
    1060                 :            : 
    1061                 :            : /*
    1062                 :            :  * Create buffers for the specified block device block's page.  If
    1063                 :            :  * that page was dirty, the buffers are set dirty also.
    1064                 :            :  */
    1065                 :            : static int
    1066                 :      59596 : grow_buffers(struct block_device *bdev, sector_t block, int size)
    1067                 :            : {
    1068                 :            :         pgoff_t index;
    1069                 :            :         int sizebits;
    1070                 :            : 
    1071                 :            :         sizebits = -1;
    1072                 :            :         do {
    1073                 :      59596 :                 sizebits++;
    1074         [ -  + ]:      59596 :         } while ((size << sizebits) < PAGE_SIZE);
    1075                 :            : 
    1076                 :      59596 :         index = block >> sizebits;
    1077                 :            : 
    1078                 :            :         /*
    1079                 :            :          * Check for a block which wants to lie outside our maximum possible
    1080                 :            :          * pagecache index.  (this comparison is done using sector_t types).
    1081                 :            :          */
    1082         [ -  + ]:      59596 :         if (unlikely(index != block >> sizebits)) {
    1083                 :            :                 char b[BDEVNAME_SIZE];
    1084                 :            : 
    1085                 :          0 :                 printk(KERN_ERR "%s: requested out-of-range block %llu for "
    1086                 :            :                         "device %s\n",
    1087                 :            :                         __func__, (unsigned long long)block,
    1088                 :            :                         bdevname(bdev, b));
    1089                 :            :                 return -EIO;
    1090                 :            :         }
    1091                 :            : 
    1092                 :            :         /* Create a page with the proper size buffers.. */
    1093                 :      59596 :         return grow_dev_page(bdev, block, index, size, sizebits);
    1094                 :            : }
    1095                 :            : 
    1096                 :            : static struct buffer_head *
    1097                 :          0 : __getblk_slow(struct block_device *bdev, sector_t block, int size)
    1098                 :            : {
    1099                 :            :         /* Size must be multiple of hard sectorsize */
    1100 [ +  - ][ +  - ]:      59600 :         if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
    1101                 :            :                         (size < 512 || size > PAGE_SIZE))) {
    1102                 :          0 :                 printk(KERN_ERR "getblk(): invalid block size %d requested\n",
    1103                 :            :                                         size);
    1104                 :          0 :                 printk(KERN_ERR "logical block size: %d\n",
    1105                 :            :                                         bdev_logical_block_size(bdev));
    1106                 :            : 
    1107                 :      59600 :                 dump_stack();
    1108                 :          0 :                 return NULL;
    1109                 :            :         }
    1110                 :            : 
    1111                 :            :         for (;;) {
    1112                 :            :                 struct buffer_head *bh;
    1113                 :            :                 int ret;
    1114                 :            : 
    1115                 :     119196 :                 bh = __find_get_block(bdev, block, size);
    1116         [ +  + ]:     119193 :                 if (bh)
    1117                 :            :                         return bh;
    1118                 :            : 
    1119                 :      59596 :                 ret = grow_buffers(bdev, block, size);
    1120         [ +  - ]:      59596 :                 if (ret < 0)
    1121                 :            :                         return NULL;
    1122         [ +  - ]:      59596 :                 if (ret == 0)
    1123                 :          0 :                         free_more_memory();
    1124                 :            :         }
    1125                 :            : }
    1126                 :            : 
    1127                 :            : /*
    1128                 :            :  * The relationship between dirty buffers and dirty pages:
    1129                 :            :  *
    1130                 :            :  * Whenever a page has any dirty buffers, the page's dirty bit is set, and
    1131                 :            :  * the page is tagged dirty in its radix tree.
    1132                 :            :  *
    1133                 :            :  * At all times, the dirtiness of the buffers represents the dirtiness of
    1134                 :            :  * subsections of the page.  If the page has buffers, the page dirty bit is
    1135                 :            :  * merely a hint about the true dirty state.
    1136                 :            :  *
    1137                 :            :  * When a page is set dirty in its entirety, all its buffers are marked dirty
    1138                 :            :  * (if the page has buffers).
    1139                 :            :  *
    1140                 :            :  * When a buffer is marked dirty, its page is dirtied, but the page's other
    1141                 :            :  * buffers are not.
    1142                 :            :  *
    1143                 :            :  * Also.  When blockdev buffers are explicitly read with bread(), they
    1144                 :            :  * individually become uptodate.  But their backing page remains not
    1145                 :            :  * uptodate - even if all of its buffers are uptodate.  A subsequent
    1146                 :            :  * block_read_full_page() against that page will discover all the uptodate
    1147                 :            :  * buffers, will set the page uptodate and will perform no I/O.
    1148                 :            :  */
    1149                 :            : 
    1150                 :            : /**
    1151                 :            :  * mark_buffer_dirty - mark a buffer_head as needing writeout
    1152                 :            :  * @bh: the buffer_head to mark dirty
    1153                 :            :  *
    1154                 :            :  * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
    1155                 :            :  * backing page dirty, then tag the page as dirty in its address_space's radix
    1156                 :            :  * tree and then attach the address_space's inode to its superblock's dirty
    1157                 :            :  * inode list.
    1158                 :            :  *
    1159                 :            :  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
    1160                 :            :  * mapping->tree_lock and mapping->host->i_lock.
    1161                 :            :  */
    1162                 :          0 : void mark_buffer_dirty(struct buffer_head *bh)
    1163                 :            : {
    1164 [ -  + ][ #  # ]:    7417513 :         WARN_ON_ONCE(!buffer_uptodate(bh));
                    [ - ]
    1165                 :            : 
    1166                 :            :         trace_block_dirty_buffer(bh);
    1167                 :            : 
    1168                 :            :         /*
    1169                 :            :          * Very *carefully* optimize the it-is-already-dirty case.
    1170                 :            :          *
    1171                 :            :          * Don't let the final "is it dirty" escape to before we
    1172                 :            :          * perhaps modified the buffer.
    1173                 :            :          */
    1174         [ +  + ]:    7417484 :         if (buffer_dirty(bh)) {
    1175                 :    5243819 :                 smp_mb();
    1176         [ -  + ]:    5243925 :                 if (buffer_dirty(bh))
    1177                 :    7417631 :                         return;
    1178                 :            :         }
    1179                 :            : 
    1180            [ + ]:    2173631 :         if (!test_set_buffer_dirty(bh)) {
    1181                 :    2173666 :                 struct page *page = bh->b_page;
    1182         [ +  + ]:    2173725 :                 if (!TestSetPageDirty(page)) {
    1183                 :    2139194 :                         struct address_space *mapping = page_mapping(page);
    1184            [ + ]:    2139182 :                         if (mapping)
    1185                 :    2139237 :                                 __set_page_dirty(page, mapping, 0);
    1186                 :            :                 }
    1187                 :            :         }
    1188                 :            : }
    1189                 :            : EXPORT_SYMBOL(mark_buffer_dirty);
    1190                 :            : 
    1191                 :            : /*
    1192                 :            :  * Decrement a buffer_head's reference count.  If all buffers against a page
    1193                 :            :  * have zero reference count, are clean and unlocked, and if the page is clean
    1194                 :            :  * and unlocked then try_to_free_buffers() may strip the buffers from the page
    1195                 :            :  * in preparation for freeing it (sometimes, rarely, buffers are removed from
    1196                 :            :  * a page but it ends up not being freed, and buffers may later be reattached).
    1197                 :            :  */
    1198                 :          0 : void __brelse(struct buffer_head * buf)
    1199                 :            : {
    1200         [ +  - ]:   14908091 :         if (atomic_read(&buf->b_count)) {
    1201                 :            :                 put_bh(buf);
    1202                 :   14909217 :                 return;
    1203                 :            :         }
    1204                 :          0 :         WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
    1205                 :            : }
    1206                 :            : EXPORT_SYMBOL(__brelse);
    1207                 :            : 
    1208                 :            : /*
    1209                 :            :  * bforget() is like brelse(), except it discards any
    1210                 :            :  * potentially dirty data.
    1211                 :            :  */
    1212                 :          0 : void __bforget(struct buffer_head *bh)
    1213                 :            : {
    1214                 :            :         clear_buffer_dirty(bh);
    1215         [ -  + ]:      38767 :         if (bh->b_assoc_map) {
    1216                 :          0 :                 struct address_space *buffer_mapping = bh->b_page->mapping;
    1217                 :            : 
    1218                 :            :                 spin_lock(&buffer_mapping->private_lock);
    1219                 :          0 :                 list_del_init(&bh->b_assoc_buffers);
    1220                 :          0 :                 bh->b_assoc_map = NULL;
    1221                 :            :                 spin_unlock(&buffer_mapping->private_lock);
    1222                 :            :         }
    1223                 :      38767 :         __brelse(bh);
    1224                 :      38767 : }
    1225                 :            : EXPORT_SYMBOL(__bforget);
    1226                 :            : 
    1227                 :          0 : static struct buffer_head *__bread_slow(struct buffer_head *bh)
    1228                 :            : {
    1229                 :            :         lock_buffer(bh);
    1230         [ #  # ]:          0 :         if (buffer_uptodate(bh)) {
    1231                 :          0 :                 unlock_buffer(bh);
    1232                 :          0 :                 return bh;
    1233                 :            :         } else {
    1234                 :            :                 get_bh(bh);
    1235                 :          0 :                 bh->b_end_io = end_buffer_read_sync;
    1236                 :            :                 submit_bh(READ, bh);
    1237                 :            :                 wait_on_buffer(bh);
    1238         [ #  # ]:          0 :                 if (buffer_uptodate(bh))
    1239                 :            :                         return bh;
    1240                 :            :         }
    1241                 :            :         brelse(bh);
    1242                 :            :         return NULL;
    1243                 :            : }
    1244                 :            : 
    1245                 :            : /*
    1246                 :            :  * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
    1247                 :            :  * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
    1248                 :            :  * refcount elevated by one when they're in an LRU.  A buffer can only appear
    1249                 :            :  * once in a particular CPU's LRU.  A single buffer can be present in multiple
    1250                 :            :  * CPU's LRUs at the same time.
    1251                 :            :  *
    1252                 :            :  * This is a transparent caching front-end to sb_bread(), sb_getblk() and
    1253                 :            :  * sb_find_get_block().
    1254                 :            :  *
    1255                 :            :  * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
    1256                 :            :  * a local interrupt disable for that.
    1257                 :            :  */
    1258                 :            : 
    1259                 :            : #define BH_LRU_SIZE     8
    1260                 :            : 
    1261                 :            : struct bh_lru {
    1262                 :            :         struct buffer_head *bhs[BH_LRU_SIZE];
    1263                 :            : };
    1264                 :            : 
    1265                 :            : static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
    1266                 :            : 
    1267                 :            : #ifdef CONFIG_SMP
    1268                 :            : #define bh_lru_lock()   local_irq_disable()
    1269                 :            : #define bh_lru_unlock() local_irq_enable()
    1270                 :            : #else
    1271                 :            : #define bh_lru_lock()   preempt_disable()
    1272                 :            : #define bh_lru_unlock() preempt_enable()
    1273                 :            : #endif
    1274                 :            : 
    1275                 :            : static inline void check_irqs_on(void)
    1276                 :            : {
    1277                 :            : #ifdef irqs_disabled
    1278 [ -  + ][ -  + ]:   14791397 :         BUG_ON(irqs_disabled());
    1279                 :            : #endif
    1280                 :            : }
    1281                 :            : 
    1282                 :            : /*
    1283                 :            :  * The LRU management algorithm is dopey-but-simple.  Sorry.
    1284                 :            :  */
    1285                 :          0 : static void bh_lru_install(struct buffer_head *bh)
    1286                 :            : {
    1287                 :            :         struct buffer_head *evictee = NULL;
    1288                 :            : 
    1289                 :            :         check_irqs_on();
    1290                 :            :         bh_lru_lock();
    1291         [ +  - ]:     304124 :         if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
    1292                 :            :                 struct buffer_head *bhs[BH_LRU_SIZE];
    1293                 :            :                 int in;
    1294                 :            :                 int out = 0;
    1295                 :            : 
    1296                 :            :                 get_bh(bh);
    1297                 :     608228 :                 bhs[out++] = bh;
    1298         [ +  + ]:    3041139 :                 for (in = 0; in < BH_LRU_SIZE; in++) {
    1299                 :            :                         struct buffer_head *bh2 =
    1300                 :    4865784 :                                 __this_cpu_read(bh_lrus.bhs[in]);
    1301                 :            : 
    1302            [ + ]:    2432892 :                         if (bh2 == bh) {
    1303                 :          0 :                                 __brelse(bh2);
    1304                 :            :                         } else {
    1305         [ +  + ]:    2432929 :                                 if (out >= BH_LRU_SIZE) {
    1306         [ -  + ]:     304120 :                                         BUG_ON(evictee != NULL);
    1307                 :            :                                         evictee = bh2;
    1308                 :            :                                 } else {
    1309                 :    2128809 :                                         bhs[out++] = bh2;
    1310                 :            :                                 }
    1311                 :            :                         }
    1312                 :            :                 }
    1313         [ -  + ]:     304123 :                 while (out < BH_LRU_SIZE)
    1314                 :          0 :                         bhs[out++] = NULL;
    1315                 :     304123 :                 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
    1316                 :            :         }
    1317                 :            :         bh_lru_unlock();
    1318                 :            : 
    1319         [ +  + ]:     304122 :         if (evictee)
    1320                 :     304121 :                 __brelse(evictee);
    1321                 :     304121 : }
    1322                 :            : 
    1323                 :            : /*
    1324                 :            :  * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
    1325                 :            :  */
    1326                 :            : static struct buffer_head *
    1327                 :          0 : lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
    1328                 :            : {
    1329                 :            :         struct buffer_head *ret = NULL;
    1330                 :            :         unsigned int i;
    1331                 :            : 
    1332                 :            :         check_irqs_on();
    1333                 :            :         bh_lru_lock();
    1334         [ +  + ]:   28177880 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1335                 :   55476052 :                 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
    1336                 :            : 
    1337       [ + ][ + ]:   27738026 :                 if (bh && bh->b_bdev == bdev &&
                 [ +  + ]
    1338         [ +  + ]:   14050890 :                                 bh->b_blocknr == block && bh->b_size == size) {
    1339         [ +  + ]:   14046824 :                         if (i) {
    1340         [ +  + ]:   15268232 :                                 while (i) {
    1341                 :   10199095 :                                         __this_cpu_write(bh_lrus.bhs[i],
    1342                 :            :                                                 __this_cpu_read(bh_lrus.bhs[i - 1]));
    1343                 :            :                                         i--;
    1344                 :            :                                 }
    1345                 :    5069137 :                                 __this_cpu_write(bh_lrus.bhs[0], bh);
    1346                 :            :                         }
    1347                 :            :                         get_bh(bh);
    1348                 :            :                         ret = bh;
    1349                 :   14050886 :                         break;
    1350                 :            :                 }
    1351                 :            :         }
    1352                 :            :         bh_lru_unlock();
    1353                 :   14487202 :         return ret;
    1354                 :            : }
    1355                 :            : 
    1356                 :            : /*
    1357                 :            :  * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
    1358                 :            :  * it in the LRU and mark it as accessed.  If it is not present then return
    1359                 :            :  * NULL
    1360                 :            :  */
    1361                 :            : struct buffer_head *
    1362                 :          0 : __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
    1363                 :            : {
    1364                 :   14486755 :         struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
    1365                 :            : 
    1366         [ +  + ]:   14487457 :         if (bh == NULL) {
    1367                 :     436667 :                 bh = __find_get_block_slow(bdev, block);
    1368         [ +  + ]:     436672 :                 if (bh)
    1369                 :     304124 :                         bh_lru_install(bh);
    1370                 :            :         }
    1371         [ +  + ]:   14487546 :         if (bh)
    1372                 :            :                 touch_buffer(bh);
    1373                 :   14487315 :         return bh;
    1374                 :            : }
    1375                 :            : EXPORT_SYMBOL(__find_get_block);
    1376                 :            : 
    1377                 :            : /*
    1378                 :            :  * __getblk will locate (and, if necessary, create) the buffer_head
    1379                 :            :  * which corresponds to the passed block_device, block and size. The
    1380                 :            :  * returned buffer has its reference count incremented.
    1381                 :            :  *
    1382                 :            :  * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
    1383                 :            :  * attempt is failing.  FIXME, perhaps?
    1384                 :            :  */
    1385                 :            : struct buffer_head *
    1386                 :          0 : __getblk(struct block_device *bdev, sector_t block, unsigned size)
    1387                 :            : {
    1388                 :   14243303 :         struct buffer_head *bh = __find_get_block(bdev, block, size);
    1389                 :            : 
    1390                 :            :         might_sleep();
    1391         [ +  + ]:   14243949 :         if (bh == NULL)
    1392                 :      59600 :                 bh = __getblk_slow(bdev, block, size);
    1393                 :        646 :         return bh;
    1394                 :            : }
    1395                 :            : EXPORT_SYMBOL(__getblk);
    1396                 :            : 
    1397                 :            : /*
    1398                 :            :  * Do async read-ahead on a buffer..
    1399                 :            :  */
    1400                 :          0 : void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
    1401                 :            : {
    1402                 :       5128 :         struct buffer_head *bh = __getblk(bdev, block, size);
    1403         [ +  - ]:       5129 :         if (likely(bh)) {
    1404                 :       5129 :                 ll_rw_block(READA, 1, &bh);
    1405                 :       5128 :                 brelse(bh);
    1406                 :            :         }
    1407                 :          1 : }
    1408                 :            : EXPORT_SYMBOL(__breadahead);
    1409                 :            : 
    1410                 :            : /**
    1411                 :            :  *  __bread() - reads a specified block and returns the bh
    1412                 :            :  *  @bdev: the block_device to read from
    1413                 :            :  *  @block: number of block
    1414                 :            :  *  @size: size (in bytes) to read
    1415                 :            :  * 
    1416                 :            :  *  Reads a specified block, and returns buffer head that contains it.
    1417                 :            :  *  It returns NULL if the block was unreadable.
    1418                 :            :  */
    1419                 :            : struct buffer_head *
    1420                 :          0 : __bread(struct block_device *bdev, sector_t block, unsigned size)
    1421                 :            : {
    1422                 :          0 :         struct buffer_head *bh = __getblk(bdev, block, size);
    1423                 :            : 
    1424 [ #  # ][ #  # ]:          0 :         if (likely(bh) && !buffer_uptodate(bh))
    1425                 :          0 :                 bh = __bread_slow(bh);
    1426                 :          0 :         return bh;
    1427                 :            : }
    1428                 :            : EXPORT_SYMBOL(__bread);
    1429                 :            : 
    1430                 :            : /*
    1431                 :            :  * invalidate_bh_lrus() is called rarely - but not only at unmount.
    1432                 :            :  * This doesn't race because it runs in each cpu either in irq
    1433                 :            :  * or with preempt disabled.
    1434                 :            :  */
    1435                 :          0 : static void invalidate_bh_lru(void *arg)
    1436                 :            : {
    1437                 :          0 :         struct bh_lru *b = &get_cpu_var(bh_lrus);
    1438                 :            :         int i;
    1439                 :            : 
    1440         [ #  # ]:          0 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1441                 :          0 :                 brelse(b->bhs[i]);
    1442                 :          0 :                 b->bhs[i] = NULL;
    1443                 :            :         }
    1444                 :          0 :         put_cpu_var(bh_lrus);
    1445                 :          0 : }
    1446                 :            : 
    1447                 :          0 : static bool has_bh_in_lru(int cpu, void *dummy)
    1448                 :            : {
    1449                 :          0 :         struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
    1450                 :            :         int i;
    1451                 :            :         
    1452         [ #  # ]:          0 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    1453         [ #  # ]:          0 :                 if (b->bhs[i])
    1454                 :            :                         return 1;
    1455                 :            :         }
    1456                 :            : 
    1457                 :            :         return 0;
    1458                 :            : }
    1459                 :            : 
    1460                 :          0 : void invalidate_bh_lrus(void)
    1461                 :            : {
    1462                 :          0 :         on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
    1463                 :          0 : }
    1464                 :            : EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
    1465                 :            : 
    1466                 :          0 : void set_bh_page(struct buffer_head *bh,
    1467                 :            :                 struct page *page, unsigned long offset)
    1468                 :            : {
    1469                 :    1969148 :         bh->b_page = page;
    1470         [ -  + ]:    1969148 :         BUG_ON(offset >= PAGE_SIZE);
    1471         [ +  + ]:    1969148 :         if (PageHighMem(page))
    1472                 :            :                 /*
    1473                 :            :                  * This catches illegal uses and preserves the offset:
    1474                 :            :                  */
    1475                 :    1262652 :                 bh->b_data = (char *)(0 + offset);
    1476                 :            :         else
    1477                 :     706496 :                 bh->b_data = page_address(page) + offset;
    1478                 :    1969164 : }
    1479                 :            : EXPORT_SYMBOL(set_bh_page);
    1480                 :            : 
    1481                 :            : /*
    1482                 :            :  * Called when truncating a buffer on a page completely.
    1483                 :            :  */
    1484                 :          0 : static void discard_buffer(struct buffer_head * bh)
    1485                 :            : {
    1486                 :            :         lock_buffer(bh);
    1487                 :            :         clear_buffer_dirty(bh);
    1488                 :    1698953 :         bh->b_bdev = NULL;
    1489                 :            :         clear_buffer_mapped(bh);
    1490                 :            :         clear_buffer_req(bh);
    1491                 :            :         clear_buffer_new(bh);
    1492                 :            :         clear_buffer_delay(bh);
    1493                 :            :         clear_buffer_unwritten(bh);
    1494                 :    1698952 :         unlock_buffer(bh);
    1495                 :    1698946 : }
    1496                 :            : 
    1497                 :            : /**
    1498                 :            :  * block_invalidatepage - invalidate part or all of a buffer-backed page
    1499                 :            :  *
    1500                 :            :  * @page: the page which is affected
    1501                 :            :  * @offset: start of the range to invalidate
    1502                 :            :  * @length: length of the range to invalidate
    1503                 :            :  *
    1504                 :            :  * block_invalidatepage() is called when all or part of the page has become
    1505                 :            :  * invalidated by a truncate operation.
    1506                 :            :  *
    1507                 :            :  * block_invalidatepage() does not have to release all buffers, but it must
    1508                 :            :  * ensure that no dirty buffer is left outside @offset and that no I/O
    1509                 :            :  * is underway against any of the blocks which are outside the truncation
    1510                 :            :  * point.  Because the caller is about to free (and possibly reuse) those
    1511                 :            :  * blocks on-disk.
    1512                 :            :  */
    1513                 :          0 : void block_invalidatepage(struct page *page, unsigned int offset,
    1514                 :            :                           unsigned int length)
    1515                 :            : {
    1516                 :            :         struct buffer_head *head, *bh, *next;
    1517                 :            :         unsigned int curr_off = 0;
    1518                 :    1713931 :         unsigned int stop = length + offset;
    1519                 :            : 
    1520         [ -  + ]:    1713931 :         BUG_ON(!PageLocked(page));
    1521            [ + ]:    1713931 :         if (!page_has_buffers(page))
    1522                 :            :                 goto out;
    1523                 :            : 
    1524                 :            :         /*
    1525                 :            :          * Check for overflow
    1526                 :            :          */
    1527         [ -  + ]:    3427875 :         BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
    1528                 :            : 
    1529         [ -  + ]:    1713944 :         head = page_buffers(page);
    1530                 :            :         bh = head;
    1531                 :            :         do {
    1532                 :    1713944 :                 unsigned int next_off = curr_off + bh->b_size;
    1533                 :    1713944 :                 next = bh->b_this_page;
    1534                 :            : 
    1535                 :            :                 /*
    1536                 :            :                  * Are we still fully in range ?
    1537                 :            :                  */
    1538            [ + ]:    1713944 :                 if (next_off > stop)
    1539                 :            :                         goto out;
    1540                 :            : 
    1541                 :            :                 /*
    1542                 :            :                  * is this block fully invalidated?
    1543                 :            :                  */
    1544         [ +  + ]:    1713946 :                 if (offset <= curr_off)
    1545                 :    1698946 :                         discard_buffer(bh);
    1546                 :            :                 curr_off = next_off;
    1547                 :            :                 bh = next;
    1548         [ -  + ]:    1713947 :         } while (bh != head);
    1549                 :            : 
    1550                 :            :         /*
    1551                 :            :          * We release buffers only if the entire page is being invalidated.
    1552                 :            :          * The get_block cached value has been unconditionally invalidated,
    1553                 :            :          * so real IO is not possible anymore.
    1554                 :            :          */
    1555         [ +  + ]:    1713947 :         if (offset == 0)
    1556                 :    1698950 :                 try_to_release_page(page, 0);
    1557                 :            : out:
    1558                 :          0 :         return;
    1559                 :            : }
    1560                 :            : EXPORT_SYMBOL(block_invalidatepage);
    1561                 :            : 
    1562                 :            : 
    1563                 :            : /*
    1564                 :            :  * We attach and possibly dirty the buffers atomically wrt
    1565                 :            :  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
    1566                 :            :  * is already excluded via the page lock.
    1567                 :            :  */
    1568                 :          0 : void create_empty_buffers(struct page *page,
    1569                 :            :                         unsigned long blocksize, unsigned long b_state)
    1570                 :            : {
    1571                 :            :         struct buffer_head *bh, *head, *tail;
    1572                 :            : 
    1573                 :    1839210 :         head = alloc_page_buffers(page, blocksize, 1);
    1574                 :            :         bh = head;
    1575                 :            :         do {
    1576                 :    1839196 :                 bh->b_state |= b_state;
    1577                 :            :                 tail = bh;
    1578                 :    1839196 :                 bh = bh->b_this_page;
    1579         [ -  + ]:    1839196 :         } while (bh);
    1580                 :    1839196 :         tail->b_this_page = head;
    1581                 :            : 
    1582                 :    1839196 :         spin_lock(&page->mapping->private_lock);
    1583 [ +  + ][ +  + ]:    1839191 :         if (PageUptodate(page) || PageDirty(page)) {
    1584                 :            :                 bh = head;
    1585                 :            :                 do {
    1586         [ -  + ]:     157225 :                         if (PageDirty(page))
    1587                 :            :                                 set_buffer_dirty(bh);
    1588         [ +  + ]:     157233 :                         if (PageUptodate(page))
    1589                 :            :                                 set_buffer_uptodate(bh);
    1590                 :     157177 :                         bh = bh->b_this_page;
    1591         [ -  + ]:     157177 :                 } while (bh != head);
    1592                 :            :         }
    1593                 :            :         attach_page_buffers(page, head);
    1594                 :    1839165 :         spin_unlock(&page->mapping->private_lock);
    1595                 :    1839203 : }
    1596                 :            : EXPORT_SYMBOL(create_empty_buffers);
    1597                 :            : 
    1598                 :            : /*
    1599                 :            :  * We are taking a block for data and we don't want any output from any
    1600                 :            :  * buffer-cache aliases starting from return from that function and
    1601                 :            :  * until the moment when something will explicitly mark the buffer
    1602                 :            :  * dirty (hopefully that will not happen until we will free that block ;-)
    1603                 :            :  * We don't even need to mark it not-uptodate - nobody can expect
    1604                 :            :  * anything from a newly allocated buffer anyway. We used to used
    1605                 :            :  * unmap_buffer() for such invalidation, but that was wrong. We definitely
    1606                 :            :  * don't want to mark the alias unmapped, for example - it would confuse
    1607                 :            :  * anyone who might pick it with bread() afterwards...
    1608                 :            :  *
    1609                 :            :  * Also..  Note that bforget() doesn't lock the buffer.  So there can
    1610                 :            :  * be writeout I/O going on against recently-freed buffers.  We don't
    1611                 :            :  * wait on that I/O in bforget() - it's more efficient to wait on the I/O
    1612                 :            :  * only if we really need to.  That happens here.
    1613                 :            :  */
    1614                 :          0 : void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
    1615                 :            : {
    1616                 :            :         struct buffer_head *old_bh;
    1617                 :            : 
    1618                 :            :         might_sleep();
    1619                 :            : 
    1620                 :    2621665 :         old_bh = __find_get_block_slow(bdev, block);
    1621         [ +  + ]:    2621671 :         if (old_bh) {
    1622                 :            :                 clear_buffer_dirty(old_bh);
    1623                 :            :                 wait_on_buffer(old_bh);
    1624                 :            :                 clear_buffer_req(old_bh);
    1625                 :       1957 :                 __brelse(old_bh);
    1626                 :            :         }
    1627                 :    2621671 : }
    1628                 :            : EXPORT_SYMBOL(unmap_underlying_metadata);
    1629                 :            : 
    1630                 :            : /*
    1631                 :            :  * Size is a power-of-two in the range 512..PAGE_SIZE,
    1632                 :            :  * and the case we care about most is PAGE_SIZE.
    1633                 :            :  *
    1634                 :            :  * So this *could* possibly be written with those
    1635                 :            :  * constraints in mind (relevant mostly if some
    1636                 :            :  * architecture has a slow bit-scan instruction)
    1637                 :            :  */
    1638                 :            : static inline int block_size_bits(unsigned int blocksize)
    1639                 :            : {
    1640 [ -  + ][ #  # ]:   14423524 :         return ilog2(blocksize);
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ +  - ]
            [ - ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ -  + ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
    1641                 :            : }
    1642                 :            : 
    1643                 :          0 : static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
    1644                 :            : {
    1645         [ -  + ]:    7212009 :         BUG_ON(!PageLocked(page));
    1646                 :            : 
    1647         [ +  + ]:    7212009 :         if (!page_has_buffers(page))
    1648                 :    1833648 :                 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
    1649         [ -  + ]:    7211973 :         return page_buffers(page);
    1650                 :            : }
    1651                 :            : 
    1652                 :            : /*
    1653                 :            :  * NOTE! All mapped/uptodate combinations are valid:
    1654                 :            :  *
    1655                 :            :  *      Mapped  Uptodate        Meaning
    1656                 :            :  *
    1657                 :            :  *      No      No              "unknown" - must do get_block()
    1658                 :            :  *      No      Yes             "hole" - zero-filled
    1659                 :            :  *      Yes     No              "allocated" - allocated on disk, not read in
    1660                 :            :  *      Yes     Yes             "valid" - allocated and up-to-date in memory.
    1661                 :            :  *
    1662                 :            :  * "Dirty" is valid only with the last case (mapped+uptodate).
    1663                 :            :  */
    1664                 :            : 
    1665                 :            : /*
    1666                 :            :  * While block_write_full_page is writing back the dirty buffers under
    1667                 :            :  * the page lock, whoever dirtied the buffers may decide to clean them
    1668                 :            :  * again at any time.  We handle that by only looking at the buffer
    1669                 :            :  * state inside lock_buffer().
    1670                 :            :  *
    1671                 :            :  * If block_write_full_page() is called for regular writeback
    1672                 :            :  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
    1673                 :            :  * locked buffer.   This only can happen if someone has written the buffer
    1674                 :            :  * directly, with submit_bh().  At the address_space level PageWriteback
    1675                 :            :  * prevents this contention from occurring.
    1676                 :            :  *
    1677                 :            :  * If block_write_full_page() is called with wbc->sync_mode ==
    1678                 :            :  * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
    1679                 :            :  * causes the writes to be flagged as synchronous writes.
    1680                 :            :  */
    1681                 :          0 : static int __block_write_full_page(struct inode *inode, struct page *page,
    1682                 :            :                         get_block_t *get_block, struct writeback_control *wbc,
    1683                 :            :                         bh_end_io_t *handler)
    1684                 :            : {
    1685                 :            :         int err;
    1686                 :            :         sector_t block;
    1687                 :            :         sector_t last_block;
    1688                 :            :         struct buffer_head *bh, *head;
    1689                 :            :         unsigned int blocksize, bbits;
    1690                 :            :         int nr_underway = 0;
    1691         [ +  + ]:      26055 :         int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
    1692                 :            :                         WRITE_SYNC : WRITE);
    1693                 :            : 
    1694                 :      26055 :         head = create_page_buffers(page, inode,
    1695                 :            :                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
    1696                 :            : 
    1697                 :            :         /*
    1698                 :            :          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
    1699                 :            :          * here, and the (potentially unmapped) buffers may become dirty at
    1700                 :            :          * any time.  If a buffer becomes dirty here after we've inspected it
    1701                 :            :          * then we just miss that fact, and the page stays dirty.
    1702                 :            :          *
    1703                 :            :          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
    1704                 :            :          * handle that here by just cleaning them.
    1705                 :            :          */
    1706                 :            : 
    1707                 :            :         bh = head;
    1708                 :      52110 :         blocksize = bh->b_size;
    1709                 :      26055 :         bbits = block_size_bits(blocksize);
    1710                 :            : 
    1711                 :      26055 :         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
    1712                 :      26055 :         last_block = (i_size_read(inode) - 1) >> bbits;
    1713                 :            : 
    1714                 :            :         /*
    1715                 :            :          * Get all the dirty buffers mapped to disk addresses and
    1716                 :            :          * handle any aliases from the underlying blockdev's mapping.
    1717                 :            :          */
    1718                 :            :         do {
    1719         [ -  + ]:      26055 :                 if (block > last_block) {
    1720                 :            :                         /*
    1721                 :            :                          * mapped buffers outside i_size will occur, because
    1722                 :            :                          * this page can be outside i_size when there is a
    1723                 :            :                          * truncate in progress.
    1724                 :            :                          */
    1725                 :            :                         /*
    1726                 :            :                          * The buffer was zeroed by block_write_full_page()
    1727                 :            :                          */
    1728                 :            :                         clear_buffer_dirty(bh);
    1729                 :            :                         set_buffer_uptodate(bh);
    1730 [ +  - ][ -  + ]:      26055 :                 } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
                 [ #  # ]
    1731                 :            :                            buffer_dirty(bh)) {
    1732         [ #  # ]:          0 :                         WARN_ON(bh->b_size != blocksize);
    1733                 :          0 :                         err = get_block(inode, block, bh, 1);
    1734         [ #  # ]:          0 :                         if (err)
    1735                 :            :                                 goto recover;
    1736                 :            :                         clear_buffer_delay(bh);
    1737         [ #  # ]:          0 :                         if (buffer_new(bh)) {
    1738                 :            :                                 /* blockdev mappings never come here */
    1739                 :            :                                 clear_buffer_new(bh);
    1740                 :          0 :                                 unmap_underlying_metadata(bh->b_bdev,
    1741                 :            :                                                         bh->b_blocknr);
    1742                 :            :                         }
    1743                 :            :                 }
    1744                 :      26055 :                 bh = bh->b_this_page;
    1745                 :      26055 :                 block++;
    1746         [ -  + ]:      26055 :         } while (bh != head);
    1747                 :            : 
    1748                 :            :         do {
    1749         [ -  + ]:      26055 :                 if (!buffer_mapped(bh))
    1750                 :          0 :                         continue;
    1751                 :            :                 /*
    1752                 :            :                  * If it's a fully non-blocking write attempt and we cannot
    1753                 :            :                  * lock the buffer then redirty the page.  Note that this can
    1754                 :            :                  * potentially cause a busy-wait loop from writeback threads
    1755                 :            :                  * and kswapd activity, but those code paths have their own
    1756                 :            :                  * higher-level throttling.
    1757                 :            :                  */
    1758         [ +  + ]:      26055 :                 if (wbc->sync_mode != WB_SYNC_NONE) {
    1759                 :            :                         lock_buffer(bh);
    1760         [ +  + ]:      20510 :                 } else if (!trylock_buffer(bh)) {
    1761                 :          4 :                         redirty_page_for_writepage(wbc, page);
    1762                 :          4 :                         continue;
    1763                 :            :                 }
    1764         [ +  + ]:      26051 :                 if (test_clear_buffer_dirty(bh)) {
    1765                 :            :                         mark_buffer_async_write_endio(bh, handler);
    1766                 :            :                 } else {
    1767                 :       6199 :                         unlock_buffer(bh);
    1768                 :            :                 }
    1769         [ -  + ]:      26055 :         } while ((bh = bh->b_this_page) != head);
    1770                 :            : 
    1771                 :            :         /*
    1772                 :            :          * The page and its buffers are protected by PageWriteback(), so we can
    1773                 :            :          * drop the bh refcounts early.
    1774                 :            :          */
    1775         [ -  + ]:      26055 :         BUG_ON(PageWriteback(page));
    1776                 :            :         set_page_writeback(page);
    1777                 :            : 
    1778                 :            :         do {
    1779                 :      26056 :                 struct buffer_head *next = bh->b_this_page;
    1780         [ +  + ]:      26056 :                 if (buffer_async_write(bh)) {
    1781                 :            :                         submit_bh(write_op, bh);
    1782                 :      19851 :                         nr_underway++;
    1783                 :            :                 }
    1784                 :            :                 bh = next;
    1785         [ -  + ]:      26055 :         } while (bh != head);
    1786                 :      26055 :         unlock_page(page);
    1787                 :            : 
    1788                 :            :         err = 0;
    1789                 :            : done:
    1790         [ +  + ]:      26055 :         if (nr_underway == 0) {
    1791                 :            :                 /*
    1792                 :            :                  * The page was marked dirty, but the buffers were
    1793                 :            :                  * clean.  Someone wrote them back by hand with
    1794                 :            :                  * ll_rw_block/submit_bh.  A rare case.
    1795                 :            :                  */
    1796                 :       6203 :                 end_page_writeback(page);
    1797                 :            : 
    1798                 :            :                 /*
    1799                 :            :                  * The page and buffer_heads can be released at any time from
    1800                 :            :                  * here on.
    1801                 :            :                  */
    1802                 :            :         }
    1803                 :      26055 :         return err;
    1804                 :            : 
    1805                 :            : recover:
    1806                 :            :         /*
    1807                 :            :          * ENOSPC, or some other error.  We may already have added some
    1808                 :            :          * blocks to the file, so we need to write these out to avoid
    1809                 :            :          * exposing stale data.
    1810                 :            :          * The page is currently locked and not marked for writeback
    1811                 :            :          */
    1812                 :            :         bh = head;
    1813                 :            :         /* Recovery: lock and submit the mapped buffers */
    1814                 :            :         do {
    1815 [ #  # ][ #  # ]:          0 :                 if (buffer_mapped(bh) && buffer_dirty(bh) &&
                 [ #  # ]
    1816                 :            :                     !buffer_delay(bh)) {
    1817                 :            :                         lock_buffer(bh);
    1818                 :            :                         mark_buffer_async_write_endio(bh, handler);
    1819                 :            :                 } else {
    1820                 :            :                         /*
    1821                 :            :                          * The buffer may have been set dirty during
    1822                 :            :                          * attachment to a dirty page.
    1823                 :            :                          */
    1824                 :            :                         clear_buffer_dirty(bh);
    1825                 :            :                 }
    1826         [ #  # ]:          0 :         } while ((bh = bh->b_this_page) != head);
    1827                 :            :         SetPageError(page);
    1828         [ #  # ]:          0 :         BUG_ON(PageWriteback(page));
    1829                 :          0 :         mapping_set_error(page->mapping, err);
    1830                 :            :         set_page_writeback(page);
    1831                 :            :         do {
    1832                 :          0 :                 struct buffer_head *next = bh->b_this_page;
    1833         [ #  # ]:          0 :                 if (buffer_async_write(bh)) {
    1834                 :            :                         clear_buffer_dirty(bh);
    1835                 :            :                         submit_bh(write_op, bh);
    1836                 :          0 :                         nr_underway++;
    1837                 :            :                 }
    1838                 :            :                 bh = next;
    1839         [ #  # ]:          0 :         } while (bh != head);
    1840                 :          0 :         unlock_page(page);
    1841                 :          0 :         goto done;
    1842                 :            : }
    1843                 :            : 
    1844                 :            : /*
    1845                 :            :  * If a page has any new buffers, zero them out here, and mark them uptodate
    1846                 :            :  * and dirty so they'll be written out (in order to prevent uninitialised
    1847                 :            :  * block data from leaking). And clear the new bit.
    1848                 :            :  */
    1849                 :          0 : void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
    1850                 :            : {
    1851                 :            :         unsigned int block_start, block_end;
    1852                 :            :         struct buffer_head *head, *bh;
    1853                 :            : 
    1854         [ -  + ]:          8 :         BUG_ON(!PageLocked(page));
    1855         [ +  - ]:          8 :         if (!page_has_buffers(page))
    1856                 :          0 :                 return;
    1857                 :            : 
    1858         [ -  + ]:          8 :         bh = head = page_buffers(page);
    1859                 :            :         block_start = 0;
    1860                 :            :         do {
    1861                 :          8 :                 block_end = block_start + bh->b_size;
    1862                 :            : 
    1863         [ +  + ]:          8 :                 if (buffer_new(bh)) {
    1864         [ +  - ]:          2 :                         if (block_end > from && block_start < to) {
    1865         [ +  - ]:          2 :                                 if (!PageUptodate(page)) {
    1866                 :            :                                         unsigned start, size;
    1867                 :            : 
    1868                 :          2 :                                         start = max(from, block_start);
    1869                 :          2 :                                         size = min(to, block_end) - start;
    1870                 :            : 
    1871                 :            :                                         zero_user(page, start, size);
    1872                 :            :                                         set_buffer_uptodate(bh);
    1873                 :            :                                 }
    1874                 :            : 
    1875                 :            :                                 clear_buffer_new(bh);
    1876                 :          2 :                                 mark_buffer_dirty(bh);
    1877                 :            :                         }
    1878                 :            :                 }
    1879                 :            : 
    1880                 :            :                 block_start = block_end;
    1881                 :          8 :                 bh = bh->b_this_page;
    1882         [ -  + ]:          8 :         } while (bh != head);
    1883                 :            : }
    1884                 :            : EXPORT_SYMBOL(page_zero_new_buffers);
    1885                 :            : 
    1886                 :          0 : int __block_write_begin(struct page *page, loff_t pos, unsigned len,
    1887                 :            :                 get_block_t *get_block)
    1888                 :            : {
    1889                 :    7184195 :         unsigned from = pos & (PAGE_CACHE_SIZE - 1);
    1890                 :    7184195 :         unsigned to = from + len;
    1891                 :    7184195 :         struct inode *inode = page->mapping->host;
    1892                 :            :         unsigned block_start, block_end;
    1893                 :            :         sector_t block;
    1894                 :            :         int err = 0;
    1895                 :            :         unsigned blocksize, bbits;
    1896                 :            :         struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
    1897                 :            : 
    1898         [ -  + ]:    7184195 :         BUG_ON(!PageLocked(page));
    1899                 :            :         BUG_ON(from > PAGE_CACHE_SIZE);
    1900         [ -  + ]:    7184195 :         BUG_ON(to > PAGE_CACHE_SIZE);
    1901         [ -  + ]:    7184195 :         BUG_ON(from > to);
    1902                 :            : 
    1903                 :    7184195 :         head = create_page_buffers(page, inode, 0);
    1904                 :   14367910 :         blocksize = head->b_size;
    1905                 :    7183715 :         bbits = block_size_bits(blocksize);
    1906                 :            : 
    1907                 :    7183715 :         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
    1908                 :            : 
    1909         [ +  + ]:   14367747 :         for(bh = head, block_start = 0; bh != head || !block_start;
    1910                 :    7184032 :             block++, block_start=block_end, bh = bh->b_this_page) {
    1911                 :    7183947 :                 block_end = block_start + blocksize;
    1912            [ + ]:    7183947 :                 if (block_end <= from || block_start >= to) {
    1913            [ - ]:          0 :                         if (PageUptodate(page)) {
    1914         [ #  # ]:          0 :                                 if (!buffer_uptodate(bh))
    1915                 :            :                                         set_buffer_uptodate(bh);
    1916                 :            :                         }
    1917                 :          0 :                         continue;
    1918                 :            :                 }
    1919         [ -  + ]:    7184009 :                 if (buffer_new(bh))
    1920                 :            :                         clear_buffer_new(bh);
    1921         [ +  + ]:    7184375 :                 if (!buffer_mapped(bh)) {
    1922         [ -  + ]:    1836013 :                         WARN_ON(bh->b_size != blocksize);
    1923                 :    1836013 :                         err = get_block(inode, block, bh, 1);
    1924         [ +  + ]:    9019809 :                         if (err)
    1925                 :            :                                 break;
    1926            [ + ]:    1835904 :                         if (buffer_new(bh)) {
    1927                 :    1835907 :                                 unmap_underlying_metadata(bh->b_bdev,
    1928                 :            :                                                         bh->b_blocknr);
    1929         [ +  + ]:    1835959 :                                 if (PageUptodate(page)) {
    1930                 :     157883 :                                         clear_buffer_new(bh);
    1931                 :     157842 :                                         set_buffer_uptodate(bh);
    1932                 :     157901 :                                         mark_buffer_dirty(bh);
    1933                 :     157909 :                                         continue;
    1934                 :            :                                 }
    1935         [ +  + ]:    1678076 :                                 if (block_end > to || block_start < from)
    1936                 :            :                                         zero_user_segments(page,
    1937                 :            :                                                 to, block_end,
    1938                 :            :                                                 block_start, from);
    1939                 :    1678081 :                                 continue;
    1940                 :            :                         }
    1941                 :            :                 }
    1942         [ +  + ]:    5348189 :                 if (PageUptodate(page)) {
    1943         [ -  + ]:    5348171 :                         if (!buffer_uptodate(bh))
    1944                 :            :                                 set_buffer_uptodate(bh);
    1945                 :    5348163 :                         continue; 
    1946                 :            :                 }
    1947 [ +  - ][ +  - ]:         18 :                 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
                 [ +  - ]
    1948         [ +  - ]:         18 :                     !buffer_unwritten(bh) &&
    1949                 :         18 :                      (block_start < from || block_end > to)) {
    1950                 :         18 :                         ll_rw_block(READ, 1, &bh);
    1951                 :         18 :                         *wait_bh++=bh;
    1952                 :            :                 }
    1953                 :            :         }
    1954                 :            :         /*
    1955                 :            :          * If we issued read requests - let them complete.
    1956                 :            :          */
    1957         [ +  + ]:    7183923 :         while(wait_bh > wait) {
    1958                 :         18 :                 wait_on_buffer(*--wait_bh);
    1959         [ -  + ]:         18 :                 if (!buffer_uptodate(*wait_bh))
    1960                 :            :                         err = -EIO;
    1961                 :            :         }
    1962         [ -  + ]:    7183905 :         if (unlikely(err))
    1963                 :          0 :                 page_zero_new_buffers(page, from, to);
    1964                 :    7183905 :         return err;
    1965                 :            : }
    1966                 :            : EXPORT_SYMBOL(__block_write_begin);
    1967                 :            : 
    1968                 :          0 : static int __block_commit_write(struct inode *inode, struct page *page,
    1969                 :            :                 unsigned from, unsigned to)
    1970                 :            : {
    1971                 :            :         unsigned block_start, block_end;
    1972                 :            :         int partial = 0;
    1973                 :            :         unsigned blocksize;
    1974                 :            :         struct buffer_head *bh, *head;
    1975                 :            : 
    1976         [ -  + ]:    7176667 :         bh = head = page_buffers(page);
    1977                 :    7176667 :         blocksize = bh->b_size;
    1978                 :            : 
    1979                 :            :         block_start = 0;
    1980                 :            :         do {
    1981                 :    7176681 :                 block_end = block_start + blocksize;
    1982         [ +  + ]:    7176681 :                 if (block_end <= from || block_start >= to) {
    1983         [ -  + ]:          2 :                         if (!buffer_uptodate(bh))
    1984                 :            :                                 partial = 1;
    1985                 :            :                 } else {
    1986                 :            :                         set_buffer_uptodate(bh);
    1987                 :    7176718 :                         mark_buffer_dirty(bh);
    1988                 :            :                 }
    1989                 :            :                 clear_buffer_new(bh);
    1990                 :            : 
    1991                 :            :                 block_start = block_end;
    1992                 :    7176766 :                 bh = bh->b_this_page;
    1993         [ +  + ]:    7176766 :         } while (bh != head);
    1994                 :            : 
    1995                 :            :         /*
    1996                 :            :          * If this is a partial write which happened to make all buffers
    1997                 :            :          * uptodate then we can optimize away a bogus readpage() for
    1998                 :            :          * the next read(). Here we 'discover' whether the page went
    1999                 :            :          * uptodate as a result of this (potentially partial) write.
    2000                 :            :          */
    2001         [ +  + ]:    7176752 :         if (!partial)
    2002                 :            :                 SetPageUptodate(page);
    2003                 :    7176689 :         return 0;
    2004                 :            : }
    2005                 :            : 
    2006                 :            : /*
    2007                 :            :  * block_write_begin takes care of the basic task of block allocation and
    2008                 :            :  * bringing partial write blocks uptodate first.
    2009                 :            :  *
    2010                 :            :  * The filesystem needs to handle block truncation upon failure.
    2011                 :            :  */
    2012                 :          0 : int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
    2013                 :            :                 unsigned flags, struct page **pagep, get_block_t *get_block)
    2014                 :            : {
    2015                 :          0 :         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
    2016                 :            :         struct page *page;
    2017                 :            :         int status;
    2018                 :            : 
    2019                 :          0 :         page = grab_cache_page_write_begin(mapping, index, flags);
    2020         [ #  # ]:          0 :         if (!page)
    2021                 :            :                 return -ENOMEM;
    2022                 :            : 
    2023                 :          0 :         status = __block_write_begin(page, pos, len, get_block);
    2024         [ #  # ]:          0 :         if (unlikely(status)) {
    2025                 :          0 :                 unlock_page(page);
    2026                 :          0 :                 page_cache_release(page);
    2027                 :            :                 page = NULL;
    2028                 :            :         }
    2029                 :            : 
    2030                 :          0 :         *pagep = page;
    2031                 :          0 :         return status;
    2032                 :            : }
    2033                 :            : EXPORT_SYMBOL(block_write_begin);
    2034                 :            : 
    2035                 :          0 : int block_write_end(struct file *file, struct address_space *mapping,
    2036                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2037                 :            :                         struct page *page, void *fsdata)
    2038                 :            : {
    2039                 :            :         struct inode *inode = mapping->host;
    2040                 :            :         unsigned start;
    2041                 :            : 
    2042                 :    6912239 :         start = pos & (PAGE_CACHE_SIZE - 1);
    2043                 :            : 
    2044         [ +  + ]:    6912239 :         if (unlikely(copied < len)) {
    2045                 :            :                 /*
    2046                 :            :                  * The buffers that were written will now be uptodate, so we
    2047                 :            :                  * don't have to worry about a readpage reading them and
    2048                 :            :                  * overwriting a partial write. However if we have encountered
    2049                 :            :                  * a short write and only partially written into a buffer, it
    2050                 :            :                  * will not be marked uptodate, so a readpage might come in and
    2051                 :            :                  * destroy our partial write.
    2052                 :            :                  *
    2053                 :            :                  * Do the simplest thing, and just treat any short write to a
    2054                 :            :                  * non uptodate page as a zero-length write, and force the
    2055                 :            :                  * caller to redo the whole thing.
    2056                 :            :                  */
    2057         [ +  + ]:    6912247 :                 if (!PageUptodate(page))
    2058                 :            :                         copied = 0;
    2059                 :            : 
    2060                 :          8 :                 page_zero_new_buffers(page, start+copied, start+len);
    2061                 :            :         }
    2062                 :    6912239 :         flush_dcache_page(page);
    2063                 :            : 
    2064                 :            :         /* This could be a short (even 0-length) commit */
    2065                 :    6912171 :         __block_commit_write(inode, page, start, start+copied);
    2066                 :            : 
    2067                 :    6912257 :         return copied;
    2068                 :            : }
    2069                 :            : EXPORT_SYMBOL(block_write_end);
    2070                 :            : 
    2071                 :          0 : int generic_write_end(struct file *file, struct address_space *mapping,
    2072                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2073                 :            :                         struct page *page, void *fsdata)
    2074                 :            : {
    2075                 :    6912344 :         struct inode *inode = mapping->host;
    2076                 :            :         int i_size_changed = 0;
    2077                 :            : 
    2078                 :    6912344 :         copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
    2079                 :            : 
    2080                 :            :         /*
    2081                 :            :          * No need to use i_size_read() here, the i_size
    2082                 :            :          * cannot change under us because we hold i_mutex.
    2083                 :            :          *
    2084                 :            :          * But it's important to update i_size while still holding page lock:
    2085                 :            :          * page writeout could otherwise come in and zero beyond i_size.
    2086                 :            :          */
    2087         [ +  + ]:    6912288 :         if (pos+copied > inode->i_size) {
    2088                 :            :                 i_size_write(inode, pos+copied);
    2089                 :            :                 i_size_changed = 1;
    2090                 :            :         }
    2091                 :            : 
    2092                 :    6912289 :         unlock_page(page);
    2093                 :    6912360 :         page_cache_release(page);
    2094                 :            : 
    2095                 :            :         /*
    2096                 :            :          * Don't mark the inode dirty under page lock. First, it unnecessarily
    2097                 :            :          * makes the holding time of page lock longer. Second, it forces lock
    2098                 :            :          * ordering of page lock and transaction start for journaling
    2099                 :            :          * filesystems.
    2100                 :            :          */
    2101         [ +  + ]:    6912372 :         if (i_size_changed)
    2102                 :            :                 mark_inode_dirty(inode);
    2103                 :            : 
    2104                 :    6912363 :         return copied;
    2105                 :            : }
    2106                 :            : EXPORT_SYMBOL(generic_write_end);
    2107                 :            : 
    2108                 :            : /*
    2109                 :            :  * block_is_partially_uptodate checks whether buffers within a page are
    2110                 :            :  * uptodate or not.
    2111                 :            :  *
    2112                 :            :  * Returns true if all buffers which correspond to a file portion
    2113                 :            :  * we want to read are uptodate.
    2114                 :            :  */
    2115                 :          0 : int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
    2116                 :            :                                         unsigned long from)
    2117                 :            : {
    2118                 :            :         unsigned block_start, block_end, blocksize;
    2119                 :            :         unsigned to;
    2120                 :            :         struct buffer_head *bh, *head;
    2121                 :            :         int ret = 1;
    2122                 :            : 
    2123         [ #  # ]:          0 :         if (!page_has_buffers(page))
    2124                 :            :                 return 0;
    2125                 :            : 
    2126         [ #  # ]:          0 :         head = page_buffers(page);
    2127                 :          0 :         blocksize = head->b_size;
    2128                 :          0 :         to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
    2129                 :          0 :         to = from + to;
    2130 [ #  # ][ #  # ]:          0 :         if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
    2131                 :            :                 return 0;
    2132                 :            : 
    2133                 :            :         bh = head;
    2134                 :            :         block_start = 0;
    2135                 :            :         do {
    2136                 :          0 :                 block_end = block_start + blocksize;
    2137         [ #  # ]:          0 :                 if (block_end > from && block_start < to) {
    2138         [ #  # ]:          0 :                         if (!buffer_uptodate(bh)) {
    2139                 :            :                                 ret = 0;
    2140                 :            :                                 break;
    2141                 :            :                         }
    2142         [ #  # ]:          0 :                         if (block_end >= to)
    2143                 :            :                                 break;
    2144                 :            :                 }
    2145                 :            :                 block_start = block_end;
    2146                 :          0 :                 bh = bh->b_this_page;
    2147         [ #  # ]:          0 :         } while (bh != head);
    2148                 :            : 
    2149                 :          0 :         return ret;
    2150                 :            : }
    2151                 :            : EXPORT_SYMBOL(block_is_partially_uptodate);
    2152                 :            : 
    2153                 :            : /*
    2154                 :            :  * Generic "read page" function for block devices that have the normal
    2155                 :            :  * get_block functionality. This is most of the block device filesystems.
    2156                 :            :  * Reads the page asynchronously --- the unlock_buffer() and
    2157                 :            :  * set/clear_buffer_uptodate() functions propagate buffer state into the
    2158                 :            :  * page struct once IO has completed.
    2159                 :            :  */
    2160                 :          0 : int block_read_full_page(struct page *page, get_block_t *get_block)
    2161                 :            : {
    2162                 :       1752 :         struct inode *inode = page->mapping->host;
    2163                 :            :         sector_t iblock, lblock;
    2164                 :            :         struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
    2165                 :            :         unsigned int blocksize, bbits;
    2166                 :            :         int nr, i;
    2167                 :            :         int fully_mapped = 1;
    2168                 :            : 
    2169                 :       1752 :         head = create_page_buffers(page, inode, 0);
    2170                 :       3504 :         blocksize = head->b_size;
    2171                 :       1752 :         bbits = block_size_bits(blocksize);
    2172                 :            : 
    2173                 :       1752 :         iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
    2174                 :       1752 :         lblock = (i_size_read(inode)+blocksize-1) >> bbits;
    2175                 :            :         bh = head;
    2176                 :            :         nr = 0;
    2177                 :            :         i = 0;
    2178                 :            : 
    2179                 :            :         do {
    2180         [ -  + ]:       1752 :                 if (buffer_uptodate(bh))
    2181                 :          0 :                         continue;
    2182                 :            : 
    2183         [ +  - ]:       1752 :                 if (!buffer_mapped(bh)) {
    2184                 :            :                         int err = 0;
    2185                 :            : 
    2186                 :            :                         fully_mapped = 0;
    2187         [ +  - ]:       1752 :                         if (iblock < lblock) {
    2188         [ -  + ]:       1752 :                                 WARN_ON(bh->b_size != blocksize);
    2189                 :       1752 :                                 err = get_block(inode, iblock, bh, 0);
    2190         [ -  + ]:       1752 :                                 if (err)
    2191                 :            :                                         SetPageError(page);
    2192                 :            :                         }
    2193         [ +  + ]:       1752 :                         if (!buffer_mapped(bh)) {
    2194                 :       1084 :                                 zero_user(page, i * blocksize, blocksize);
    2195         [ +  - ]:       1084 :                                 if (!err)
    2196                 :            :                                         set_buffer_uptodate(bh);
    2197                 :       1084 :                                 continue;
    2198                 :            :                         }
    2199                 :            :                         /*
    2200                 :            :                          * get_block() might have updated the buffer
    2201                 :            :                          * synchronously
    2202                 :            :                          */
    2203         [ -  + ]:        668 :                         if (buffer_uptodate(bh))
    2204                 :          0 :                                 continue;
    2205                 :            :                 }
    2206                 :        668 :                 arr[nr++] = bh;
    2207         [ -  + ]:       1752 :         } while (i++, iblock++, (bh = bh->b_this_page) != head);
    2208                 :            : 
    2209         [ -  + ]:       1752 :         if (fully_mapped)
    2210                 :            :                 SetPageMappedToDisk(page);
    2211                 :            : 
    2212         [ +  + ]:       1752 :         if (!nr) {
    2213                 :            :                 /*
    2214                 :            :                  * All buffers are uptodate - we can set the page uptodate
    2215                 :            :                  * as well. But not if get_block() returned an error.
    2216                 :            :                  */
    2217         [ +  - ]:       1084 :                 if (!PageError(page))
    2218                 :            :                         SetPageUptodate(page);
    2219                 :       1084 :                 unlock_page(page);
    2220                 :       1084 :                 return 0;
    2221                 :            :         }
    2222                 :            : 
    2223                 :            :         /* Stage two: lock the buffers */
    2224         [ +  + ]:       1336 :         for (i = 0; i < nr; i++) {
    2225                 :        668 :                 bh = arr[i];
    2226                 :            :                 lock_buffer(bh);
    2227                 :            :                 mark_buffer_async_read(bh);
    2228                 :            :         }
    2229                 :            : 
    2230                 :            :         /*
    2231                 :            :          * Stage 3: start the IO.  Check for uptodateness
    2232                 :            :          * inside the buffer lock in case another process reading
    2233                 :            :          * the underlying blockdev brought it uptodate (the sct fix).
    2234                 :            :          */
    2235         [ +  + ]:       1336 :         for (i = 0; i < nr; i++) {
    2236                 :        668 :                 bh = arr[i];
    2237         [ -  + ]:        668 :                 if (buffer_uptodate(bh))
    2238                 :          0 :                         end_buffer_async_read(bh, 1);
    2239                 :            :                 else
    2240                 :            :                         submit_bh(READ, bh);
    2241                 :            :         }
    2242                 :            :         return 0;
    2243                 :            : }
    2244                 :            : EXPORT_SYMBOL(block_read_full_page);
    2245                 :            : 
    2246                 :            : /* utility function for filesystems that need to do work on expanding
    2247                 :            :  * truncates.  Uses filesystem pagecache writes to allow the filesystem to
    2248                 :            :  * deal with the hole.  
    2249                 :            :  */
    2250                 :          0 : int generic_cont_expand_simple(struct inode *inode, loff_t size)
    2251                 :            : {
    2252                 :          0 :         struct address_space *mapping = inode->i_mapping;
    2253                 :            :         struct page *page;
    2254                 :            :         void *fsdata;
    2255                 :            :         int err;
    2256                 :            : 
    2257                 :          0 :         err = inode_newsize_ok(inode, size);
    2258         [ #  # ]:          0 :         if (err)
    2259                 :            :                 goto out;
    2260                 :            : 
    2261                 :          0 :         err = pagecache_write_begin(NULL, mapping, size, 0,
    2262                 :            :                                 AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
    2263                 :            :                                 &page, &fsdata);
    2264         [ #  # ]:          0 :         if (err)
    2265                 :            :                 goto out;
    2266                 :            : 
    2267                 :          0 :         err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
    2268         [ #  # ]:          0 :         BUG_ON(err > 0);
    2269                 :            : 
    2270                 :            : out:
    2271                 :          0 :         return err;
    2272                 :            : }
    2273                 :            : EXPORT_SYMBOL(generic_cont_expand_simple);
    2274                 :            : 
    2275                 :          0 : static int cont_expand_zero(struct file *file, struct address_space *mapping,
    2276                 :            :                             loff_t pos, loff_t *bytes)
    2277                 :            : {
    2278                 :          0 :         struct inode *inode = mapping->host;
    2279                 :          0 :         unsigned blocksize = 1 << inode->i_blkbits;
    2280                 :            :         struct page *page;
    2281                 :            :         void *fsdata;
    2282                 :            :         pgoff_t index, curidx;
    2283                 :            :         loff_t curpos;
    2284                 :            :         unsigned zerofrom, offset, len;
    2285                 :            :         int err = 0;
    2286                 :            : 
    2287                 :          0 :         index = pos >> PAGE_CACHE_SHIFT;
    2288                 :          0 :         offset = pos & ~PAGE_CACHE_MASK;
    2289                 :            : 
    2290         [ #  # ]:          0 :         while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
    2291                 :          0 :                 zerofrom = curpos & ~PAGE_CACHE_MASK;
    2292         [ #  # ]:          0 :                 if (zerofrom & (blocksize-1)) {
    2293                 :          0 :                         *bytes |= (blocksize-1);
    2294                 :          0 :                         (*bytes)++;
    2295                 :            :                 }
    2296                 :          0 :                 len = PAGE_CACHE_SIZE - zerofrom;
    2297                 :            : 
    2298                 :          0 :                 err = pagecache_write_begin(file, mapping, curpos, len,
    2299                 :            :                                                 AOP_FLAG_UNINTERRUPTIBLE,
    2300                 :            :                                                 &page, &fsdata);
    2301         [ #  # ]:          0 :                 if (err)
    2302                 :            :                         goto out;
    2303                 :          0 :                 zero_user(page, zerofrom, len);
    2304                 :          0 :                 err = pagecache_write_end(file, mapping, curpos, len, len,
    2305                 :            :                                                 page, fsdata);
    2306         [ #  # ]:          0 :                 if (err < 0)
    2307                 :            :                         goto out;
    2308         [ #  # ]:          0 :                 BUG_ON(err != len);
    2309                 :            :                 err = 0;
    2310                 :            : 
    2311                 :          0 :                 balance_dirty_pages_ratelimited(mapping);
    2312                 :            :         }
    2313                 :            : 
    2314                 :            :         /* page covers the boundary, find the boundary offset */
    2315         [ #  # ]:          0 :         if (index == curidx) {
    2316                 :          0 :                 zerofrom = curpos & ~PAGE_CACHE_MASK;
    2317                 :            :                 /* if we will expand the thing last block will be filled */
    2318         [ #  # ]:          0 :                 if (offset <= zerofrom) {
    2319                 :            :                         goto out;
    2320                 :            :                 }
    2321         [ #  # ]:          0 :                 if (zerofrom & (blocksize-1)) {
    2322                 :          0 :                         *bytes |= (blocksize-1);
    2323                 :          0 :                         (*bytes)++;
    2324                 :            :                 }
    2325                 :          0 :                 len = offset - zerofrom;
    2326                 :            : 
    2327                 :          0 :                 err = pagecache_write_begin(file, mapping, curpos, len,
    2328                 :            :                                                 AOP_FLAG_UNINTERRUPTIBLE,
    2329                 :            :                                                 &page, &fsdata);
    2330         [ #  # ]:          0 :                 if (err)
    2331                 :            :                         goto out;
    2332                 :          0 :                 zero_user(page, zerofrom, len);
    2333                 :          0 :                 err = pagecache_write_end(file, mapping, curpos, len, len,
    2334                 :            :                                                 page, fsdata);
    2335         [ #  # ]:          0 :                 if (err < 0)
    2336                 :            :                         goto out;
    2337         [ #  # ]:          0 :                 BUG_ON(err != len);
    2338                 :            :                 err = 0;
    2339                 :            :         }
    2340                 :            : out:
    2341                 :          0 :         return err;
    2342                 :            : }
    2343                 :            : 
    2344                 :            : /*
    2345                 :            :  * For moronic filesystems that do not allow holes in file.
    2346                 :            :  * We may have to extend the file.
    2347                 :            :  */
    2348                 :          0 : int cont_write_begin(struct file *file, struct address_space *mapping,
    2349                 :            :                         loff_t pos, unsigned len, unsigned flags,
    2350                 :            :                         struct page **pagep, void **fsdata,
    2351                 :            :                         get_block_t *get_block, loff_t *bytes)
    2352                 :            : {
    2353                 :          0 :         struct inode *inode = mapping->host;
    2354                 :          0 :         unsigned blocksize = 1 << inode->i_blkbits;
    2355                 :            :         unsigned zerofrom;
    2356                 :            :         int err;
    2357                 :            : 
    2358                 :          0 :         err = cont_expand_zero(file, mapping, pos, bytes);
    2359         [ #  # ]:          0 :         if (err)
    2360                 :            :                 return err;
    2361                 :            : 
    2362                 :          0 :         zerofrom = *bytes & ~PAGE_CACHE_MASK;
    2363 [ #  # ][ #  # ]:          0 :         if (pos+len > *bytes && zerofrom & (blocksize-1)) {
    2364                 :          0 :                 *bytes |= (blocksize-1);
    2365                 :          0 :                 (*bytes)++;
    2366                 :            :         }
    2367                 :            : 
    2368                 :          0 :         return block_write_begin(mapping, pos, len, flags, pagep, get_block);
    2369                 :            : }
    2370                 :            : EXPORT_SYMBOL(cont_write_begin);
    2371                 :            : 
    2372                 :          0 : int block_commit_write(struct page *page, unsigned from, unsigned to)
    2373                 :            : {
    2374                 :            :         struct inode *inode = page->mapping->host;
    2375                 :     264357 :         __block_commit_write(inode,page,from,to);
    2376                 :          0 :         return 0;
    2377                 :            : }
    2378                 :            : EXPORT_SYMBOL(block_commit_write);
    2379                 :            : 
    2380                 :            : /*
    2381                 :            :  * block_page_mkwrite() is not allowed to change the file size as it gets
    2382                 :            :  * called from a page fault handler when a page is first dirtied. Hence we must
    2383                 :            :  * be careful to check for EOF conditions here. We set the page up correctly
    2384                 :            :  * for a written page which means we get ENOSPC checking when writing into
    2385                 :            :  * holes and correct delalloc and unwritten extent mapping on filesystems that
    2386                 :            :  * support these features.
    2387                 :            :  *
    2388                 :            :  * We are not allowed to take the i_mutex here so we have to play games to
    2389                 :            :  * protect against truncate races as the page could now be beyond EOF.  Because
    2390                 :            :  * truncate writes the inode size before removing pages, once we have the
    2391                 :            :  * page lock we can determine safely if the page is beyond EOF. If it is not
    2392                 :            :  * beyond EOF, then the page is guaranteed safe against truncation until we
    2393                 :            :  * unlock the page.
    2394                 :            :  *
    2395                 :            :  * Direct callers of this function should protect against filesystem freezing
    2396                 :            :  * using sb_start_write() - sb_end_write() functions.
    2397                 :            :  */
    2398                 :          0 : int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
    2399                 :            :                          get_block_t get_block)
    2400                 :            : {
    2401                 :     528750 :         struct page *page = vmf->page;
    2402                 :     264373 :         struct inode *inode = file_inode(vma->vm_file);
    2403                 :            :         unsigned long end;
    2404                 :            :         loff_t size;
    2405                 :            :         int ret;
    2406                 :            : 
    2407                 :            :         lock_page(page);
    2408                 :            :         size = i_size_read(inode);
    2409    [ + ][ +  - ]:     264373 :         if ((page->mapping != inode->i_mapping) ||
    2410                 :            :             (page_offset(page) > size)) {
    2411                 :            :                 /* We overload EFAULT to mean page got truncated */
    2412                 :            :                 ret = -EFAULT;
    2413                 :            :                 goto out_unlock;
    2414                 :            :         }
    2415                 :            : 
    2416                 :            :         /* page is wholly or partially inside EOF */
    2417         [ +  + ]:     264377 :         if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
    2418                 :      92283 :                 end = size & ~PAGE_CACHE_MASK;
    2419                 :            :         else
    2420                 :            :                 end = PAGE_CACHE_SIZE;
    2421                 :            : 
    2422                 :     264377 :         ret = __block_write_begin(page, 0, end, get_block);
    2423         [ +  + ]:     264359 :         if (!ret)
    2424                 :            :                 ret = block_commit_write(page, 0, end);
    2425                 :            : 
    2426         [ +  - ]:     264382 :         if (unlikely(ret < 0))
    2427                 :            :                 goto out_unlock;
    2428                 :     264382 :         set_page_dirty(page);
    2429                 :     264384 :         wait_for_stable_page(page);
    2430                 :     264371 :         return 0;
    2431                 :            : out_unlock:
    2432                 :          0 :         unlock_page(page);
    2433                 :          0 :         return ret;
    2434                 :            : }
    2435                 :            : EXPORT_SYMBOL(__block_page_mkwrite);
    2436                 :            : 
    2437                 :          0 : int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
    2438                 :            :                    get_block_t get_block)
    2439                 :            : {
    2440                 :            :         int ret;
    2441                 :          0 :         struct super_block *sb = file_inode(vma->vm_file)->i_sb;
    2442                 :            : 
    2443                 :            :         sb_start_pagefault(sb);
    2444                 :            : 
    2445                 :            :         /*
    2446                 :            :          * Update file times before taking page lock. We may end up failing the
    2447                 :            :          * fault so this update may be superfluous but who really cares...
    2448                 :            :          */
    2449                 :          0 :         file_update_time(vma->vm_file);
    2450                 :            : 
    2451                 :          0 :         ret = __block_page_mkwrite(vma, vmf, get_block);
    2452                 :            :         sb_end_pagefault(sb);
    2453                 :          0 :         return block_page_mkwrite_return(ret);
    2454                 :            : }
    2455                 :            : EXPORT_SYMBOL(block_page_mkwrite);
    2456                 :            : 
    2457                 :            : /*
    2458                 :            :  * nobh_write_begin()'s prereads are special: the buffer_heads are freed
    2459                 :            :  * immediately, while under the page lock.  So it needs a special end_io
    2460                 :            :  * handler which does not touch the bh after unlocking it.
    2461                 :            :  */
    2462                 :          0 : static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
    2463                 :            : {
    2464                 :          0 :         __end_buffer_read_notouch(bh, uptodate);
    2465                 :          0 : }
    2466                 :            : 
    2467                 :            : /*
    2468                 :            :  * Attach the singly-linked list of buffers created by nobh_write_begin, to
    2469                 :            :  * the page (converting it to circular linked list and taking care of page
    2470                 :            :  * dirty races).
    2471                 :            :  */
    2472                 :          0 : static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
    2473                 :            : {
    2474                 :            :         struct buffer_head *bh;
    2475                 :            : 
    2476         [ #  # ]:          0 :         BUG_ON(!PageLocked(page));
    2477                 :            : 
    2478                 :          0 :         spin_lock(&page->mapping->private_lock);
    2479                 :            :         bh = head;
    2480                 :            :         do {
    2481         [ #  # ]:          0 :                 if (PageDirty(page))
    2482                 :            :                         set_buffer_dirty(bh);
    2483         [ #  # ]:          0 :                 if (!bh->b_this_page)
    2484                 :          0 :                         bh->b_this_page = head;
    2485                 :          0 :                 bh = bh->b_this_page;
    2486         [ #  # ]:          0 :         } while (bh != head);
    2487                 :            :         attach_page_buffers(page, head);
    2488                 :          0 :         spin_unlock(&page->mapping->private_lock);
    2489                 :          0 : }
    2490                 :            : 
    2491                 :            : /*
    2492                 :            :  * On entry, the page is fully not uptodate.
    2493                 :            :  * On exit the page is fully uptodate in the areas outside (from,to)
    2494                 :            :  * The filesystem needs to handle block truncation upon failure.
    2495                 :            :  */
    2496                 :          0 : int nobh_write_begin(struct address_space *mapping,
    2497                 :            :                         loff_t pos, unsigned len, unsigned flags,
    2498                 :            :                         struct page **pagep, void **fsdata,
    2499                 :            :                         get_block_t *get_block)
    2500                 :            : {
    2501                 :          0 :         struct inode *inode = mapping->host;
    2502                 :          0 :         const unsigned blkbits = inode->i_blkbits;
    2503                 :          0 :         const unsigned blocksize = 1 << blkbits;
    2504                 :            :         struct buffer_head *head, *bh;
    2505                 :            :         struct page *page;
    2506                 :            :         pgoff_t index;
    2507                 :            :         unsigned from, to;
    2508                 :            :         unsigned block_in_page;
    2509                 :            :         unsigned block_start, block_end;
    2510                 :            :         sector_t block_in_file;
    2511                 :            :         int nr_reads = 0;
    2512                 :            :         int ret = 0;
    2513                 :            :         int is_mapped_to_disk = 1;
    2514                 :            : 
    2515                 :          0 :         index = pos >> PAGE_CACHE_SHIFT;
    2516                 :          0 :         from = pos & (PAGE_CACHE_SIZE - 1);
    2517                 :          0 :         to = from + len;
    2518                 :            : 
    2519                 :          0 :         page = grab_cache_page_write_begin(mapping, index, flags);
    2520         [ #  # ]:          0 :         if (!page)
    2521                 :            :                 return -ENOMEM;
    2522                 :          0 :         *pagep = page;
    2523                 :          0 :         *fsdata = NULL;
    2524                 :            : 
    2525         [ #  # ]:          0 :         if (page_has_buffers(page)) {
    2526                 :          0 :                 ret = __block_write_begin(page, pos, len, get_block);
    2527         [ #  # ]:          0 :                 if (unlikely(ret))
    2528                 :            :                         goto out_release;
    2529                 :            :                 return ret;
    2530                 :            :         }
    2531                 :            : 
    2532         [ #  # ]:          0 :         if (PageMappedToDisk(page))
    2533                 :            :                 return 0;
    2534                 :            : 
    2535                 :            :         /*
    2536                 :            :          * Allocate buffers so that we can keep track of state, and potentially
    2537                 :            :          * attach them to the page if an error occurs. In the common case of
    2538                 :            :          * no error, they will just be freed again without ever being attached
    2539                 :            :          * to the page (which is all OK, because we're under the page lock).
    2540                 :            :          *
    2541                 :            :          * Be careful: the buffer linked list is a NULL terminated one, rather
    2542                 :            :          * than the circular one we're used to.
    2543                 :            :          */
    2544                 :          0 :         head = alloc_page_buffers(page, blocksize, 0);
    2545         [ #  # ]:          0 :         if (!head) {
    2546                 :            :                 ret = -ENOMEM;
    2547                 :            :                 goto out_release;
    2548                 :            :         }
    2549                 :            : 
    2550                 :          0 :         block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
    2551                 :            : 
    2552                 :            :         /*
    2553                 :            :          * We loop across all blocks in the page, whether or not they are
    2554                 :            :          * part of the affected region.  This is so we can discover if the
    2555                 :            :          * page is fully mapped-to-disk.
    2556                 :            :          */
    2557         [ #  # ]:          0 :         for (block_start = 0, block_in_page = 0, bh = head;
    2558                 :            :                   block_start < PAGE_CACHE_SIZE;
    2559                 :          0 :                   block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
    2560                 :            :                 int create;
    2561                 :            : 
    2562                 :          0 :                 block_end = block_start + blocksize;
    2563                 :          0 :                 bh->b_state = 0;
    2564                 :            :                 create = 1;
    2565         [ #  # ]:          0 :                 if (block_start >= to)
    2566                 :            :                         create = 0;
    2567                 :          0 :                 ret = get_block(inode, block_in_file + block_in_page,
    2568                 :            :                                         bh, create);
    2569         [ #  # ]:          0 :                 if (ret)
    2570                 :            :                         goto failed;
    2571         [ #  # ]:          0 :                 if (!buffer_mapped(bh))
    2572                 :            :                         is_mapped_to_disk = 0;
    2573         [ #  # ]:          0 :                 if (buffer_new(bh))
    2574                 :          0 :                         unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
    2575         [ #  # ]:          0 :                 if (PageUptodate(page)) {
    2576                 :            :                         set_buffer_uptodate(bh);
    2577                 :          0 :                         continue;
    2578                 :            :                 }
    2579 [ #  # ][ #  # ]:          0 :                 if (buffer_new(bh) || !buffer_mapped(bh)) {
    2580                 :            :                         zero_user_segments(page, block_start, from,
    2581                 :            :                                                         to, block_end);
    2582                 :          0 :                         continue;
    2583                 :            :                 }
    2584         [ #  # ]:          0 :                 if (buffer_uptodate(bh))
    2585                 :          0 :                         continue;       /* reiserfs does this */
    2586         [ #  # ]:          0 :                 if (block_start < from || block_end > to) {
    2587                 :            :                         lock_buffer(bh);
    2588                 :          0 :                         bh->b_end_io = end_buffer_read_nobh;
    2589                 :            :                         submit_bh(READ, bh);
    2590                 :          0 :                         nr_reads++;
    2591                 :            :                 }
    2592                 :            :         }
    2593                 :            : 
    2594         [ #  # ]:          0 :         if (nr_reads) {
    2595                 :            :                 /*
    2596                 :            :                  * The page is locked, so these buffers are protected from
    2597                 :            :                  * any VM or truncate activity.  Hence we don't need to care
    2598                 :            :                  * for the buffer_head refcounts.
    2599                 :            :                  */
    2600         [ #  # ]:          0 :                 for (bh = head; bh; bh = bh->b_this_page) {
    2601                 :            :                         wait_on_buffer(bh);
    2602         [ #  # ]:          0 :                         if (!buffer_uptodate(bh))
    2603                 :            :                                 ret = -EIO;
    2604                 :            :                 }
    2605         [ #  # ]:          0 :                 if (ret)
    2606                 :            :                         goto failed;
    2607                 :            :         }
    2608                 :            : 
    2609         [ #  # ]:          0 :         if (is_mapped_to_disk)
    2610                 :            :                 SetPageMappedToDisk(page);
    2611                 :            : 
    2612                 :          0 :         *fsdata = head; /* to be released by nobh_write_end */
    2613                 :            : 
    2614                 :          0 :         return 0;
    2615                 :            : 
    2616                 :            : failed:
    2617         [ #  # ]:          0 :         BUG_ON(!ret);
    2618                 :            :         /*
    2619                 :            :          * Error recovery is a bit difficult. We need to zero out blocks that
    2620                 :            :          * were newly allocated, and dirty them to ensure they get written out.
    2621                 :            :          * Buffers need to be attached to the page at this point, otherwise
    2622                 :            :          * the handling of potential IO errors during writeout would be hard
    2623                 :            :          * (could try doing synchronous writeout, but what if that fails too?)
    2624                 :            :          */
    2625                 :          0 :         attach_nobh_buffers(page, head);
    2626                 :          0 :         page_zero_new_buffers(page, from, to);
    2627                 :            : 
    2628                 :            : out_release:
    2629                 :          0 :         unlock_page(page);
    2630                 :          0 :         page_cache_release(page);
    2631                 :          0 :         *pagep = NULL;
    2632                 :            : 
    2633                 :          0 :         return ret;
    2634                 :            : }
    2635                 :            : EXPORT_SYMBOL(nobh_write_begin);
    2636                 :            : 
    2637                 :          0 : int nobh_write_end(struct file *file, struct address_space *mapping,
    2638                 :            :                         loff_t pos, unsigned len, unsigned copied,
    2639                 :            :                         struct page *page, void *fsdata)
    2640                 :            : {
    2641                 :          0 :         struct inode *inode = page->mapping->host;
    2642                 :            :         struct buffer_head *head = fsdata;
    2643                 :            :         struct buffer_head *bh;
    2644 [ #  # ][ #  # ]:          0 :         BUG_ON(fsdata != NULL && page_has_buffers(page));
    2645                 :            : 
    2646 [ #  # ][ #  # ]:          0 :         if (unlikely(copied < len) && head)
    2647                 :          0 :                 attach_nobh_buffers(page, head);
    2648         [ #  # ]:          0 :         if (page_has_buffers(page))
    2649                 :          0 :                 return generic_write_end(file, mapping, pos, len,
    2650                 :            :                                         copied, page, fsdata);
    2651                 :            : 
    2652                 :            :         SetPageUptodate(page);
    2653                 :          0 :         set_page_dirty(page);
    2654         [ #  # ]:          0 :         if (pos+copied > inode->i_size) {
    2655                 :            :                 i_size_write(inode, pos+copied);
    2656                 :            :                 mark_inode_dirty(inode);
    2657                 :            :         }
    2658                 :            : 
    2659                 :          0 :         unlock_page(page);
    2660                 :          0 :         page_cache_release(page);
    2661                 :            : 
    2662         [ #  # ]:          0 :         while (head) {
    2663                 :            :                 bh = head;
    2664                 :          0 :                 head = head->b_this_page;
    2665                 :          0 :                 free_buffer_head(bh);
    2666                 :            :         }
    2667                 :            : 
    2668                 :          0 :         return copied;
    2669                 :            : }
    2670                 :            : EXPORT_SYMBOL(nobh_write_end);
    2671                 :            : 
    2672                 :            : /*
    2673                 :            :  * nobh_writepage() - based on block_full_write_page() except
    2674                 :            :  * that it tries to operate without attaching bufferheads to
    2675                 :            :  * the page.
    2676                 :            :  */
    2677                 :          0 : int nobh_writepage(struct page *page, get_block_t *get_block,
    2678                 :            :                         struct writeback_control *wbc)
    2679                 :            : {
    2680                 :          0 :         struct inode * const inode = page->mapping->host;
    2681                 :            :         loff_t i_size = i_size_read(inode);
    2682                 :          0 :         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
    2683                 :            :         unsigned offset;
    2684                 :            :         int ret;
    2685                 :            : 
    2686                 :            :         /* Is the page fully inside i_size? */
    2687         [ #  # ]:          0 :         if (page->index < end_index)
    2688                 :            :                 goto out;
    2689                 :            : 
    2690                 :            :         /* Is the page fully outside i_size? (truncate in progress) */
    2691                 :          0 :         offset = i_size & (PAGE_CACHE_SIZE-1);
    2692 [ #  # ][ #  # ]:          0 :         if (page->index >= end_index+1 || !offset) {
    2693                 :            :                 /*
    2694                 :            :                  * The page may have dirty, unmapped buffers.  For example,
    2695                 :            :                  * they may have been added in ext3_writepage().  Make them
    2696                 :            :                  * freeable here, so the page does not leak.
    2697                 :            :                  */
    2698                 :            : #if 0
    2699                 :            :                 /* Not really sure about this  - do we need this ? */
    2700                 :            :                 if (page->mapping->a_ops->invalidatepage)
    2701                 :            :                         page->mapping->a_ops->invalidatepage(page, offset);
    2702                 :            : #endif
    2703                 :          0 :                 unlock_page(page);
    2704                 :          0 :                 return 0; /* don't care */
    2705                 :            :         }
    2706                 :            : 
    2707                 :            :         /*
    2708                 :            :          * The page straddles i_size.  It must be zeroed out on each and every
    2709                 :            :          * writepage invocation because it may be mmapped.  "A file is mapped
    2710                 :            :          * in multiples of the page size.  For a file that is not a multiple of
    2711                 :            :          * the  page size, the remaining memory is zeroed when mapped, and
    2712                 :            :          * writes to that region are not written out to the file."
    2713                 :            :          */
    2714                 :            :         zero_user_segment(page, offset, PAGE_CACHE_SIZE);
    2715                 :            : out:
    2716                 :          0 :         ret = mpage_writepage(page, get_block, wbc);
    2717         [ #  # ]:          0 :         if (ret == -EAGAIN)
    2718                 :          0 :                 ret = __block_write_full_page(inode, page, get_block, wbc,
    2719                 :            :                                               end_buffer_async_write);
    2720                 :          0 :         return ret;
    2721                 :            : }
    2722                 :            : EXPORT_SYMBOL(nobh_writepage);
    2723                 :            : 
    2724                 :          0 : int nobh_truncate_page(struct address_space *mapping,
    2725                 :            :                         loff_t from, get_block_t *get_block)
    2726                 :            : {
    2727                 :          0 :         pgoff_t index = from >> PAGE_CACHE_SHIFT;
    2728                 :          0 :         unsigned offset = from & (PAGE_CACHE_SIZE-1);
    2729                 :            :         unsigned blocksize;
    2730                 :            :         sector_t iblock;
    2731                 :            :         unsigned length, pos;
    2732                 :          0 :         struct inode *inode = mapping->host;
    2733                 :            :         struct page *page;
    2734                 :            :         struct buffer_head map_bh;
    2735                 :            :         int err;
    2736                 :            : 
    2737                 :          0 :         blocksize = 1 << inode->i_blkbits;
    2738                 :          0 :         length = offset & (blocksize - 1);
    2739                 :            : 
    2740                 :            :         /* Block boundary? Nothing to do */
    2741         [ #  # ]:          0 :         if (!length)
    2742                 :            :                 return 0;
    2743                 :            : 
    2744                 :          0 :         length = blocksize - length;
    2745                 :          0 :         iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
    2746                 :            : 
    2747                 :            :         page = grab_cache_page(mapping, index);
    2748                 :            :         err = -ENOMEM;
    2749         [ #  # ]:          0 :         if (!page)
    2750                 :            :                 goto out;
    2751                 :            : 
    2752         [ #  # ]:          0 :         if (page_has_buffers(page)) {
    2753                 :            : has_buffers:
    2754                 :          0 :                 unlock_page(page);
    2755                 :          0 :                 page_cache_release(page);
    2756                 :          0 :                 return block_truncate_page(mapping, from, get_block);
    2757                 :            :         }
    2758                 :            : 
    2759                 :            :         /* Find the buffer that contains "offset" */
    2760                 :            :         pos = blocksize;
    2761         [ #  # ]:          0 :         while (offset >= pos) {
    2762                 :          0 :                 iblock++;
    2763                 :          0 :                 pos += blocksize;
    2764                 :            :         }
    2765                 :            : 
    2766                 :          0 :         map_bh.b_size = blocksize;
    2767                 :          0 :         map_bh.b_state = 0;
    2768                 :          0 :         err = get_block(inode, iblock, &map_bh, 0);
    2769         [ #  # ]:          0 :         if (err)
    2770                 :            :                 goto unlock;
    2771                 :            :         /* unmapped? It's a hole - nothing to do */
    2772         [ #  # ]:          0 :         if (!buffer_mapped(&map_bh))
    2773                 :            :                 goto unlock;
    2774                 :            : 
    2775                 :            :         /* Ok, it's mapped. Make sure it's up-to-date */
    2776         [ #  # ]:          0 :         if (!PageUptodate(page)) {
    2777                 :          0 :                 err = mapping->a_ops->readpage(NULL, page);
    2778         [ #  # ]:          0 :                 if (err) {
    2779                 :          0 :                         page_cache_release(page);
    2780                 :          0 :                         goto out;
    2781                 :            :                 }
    2782                 :            :                 lock_page(page);
    2783         [ #  # ]:          0 :                 if (!PageUptodate(page)) {
    2784                 :            :                         err = -EIO;
    2785                 :            :                         goto unlock;
    2786                 :            :                 }
    2787         [ #  # ]:          0 :                 if (page_has_buffers(page))
    2788                 :            :                         goto has_buffers;
    2789                 :            :         }
    2790                 :            :         zero_user(page, offset, length);
    2791                 :          0 :         set_page_dirty(page);
    2792                 :            :         err = 0;
    2793                 :            : 
    2794                 :            : unlock:
    2795                 :          0 :         unlock_page(page);
    2796                 :          0 :         page_cache_release(page);
    2797                 :            : out:
    2798                 :          0 :         return err;
    2799                 :            : }
    2800                 :            : EXPORT_SYMBOL(nobh_truncate_page);
    2801                 :            : 
    2802                 :          0 : int block_truncate_page(struct address_space *mapping,
    2803                 :            :                         loff_t from, get_block_t *get_block)
    2804                 :            : {
    2805                 :          0 :         pgoff_t index = from >> PAGE_CACHE_SHIFT;
    2806                 :          0 :         unsigned offset = from & (PAGE_CACHE_SIZE-1);
    2807                 :            :         unsigned blocksize;
    2808                 :            :         sector_t iblock;
    2809                 :            :         unsigned length, pos;
    2810                 :          0 :         struct inode *inode = mapping->host;
    2811                 :            :         struct page *page;
    2812                 :            :         struct buffer_head *bh;
    2813                 :            :         int err;
    2814                 :            : 
    2815                 :          0 :         blocksize = 1 << inode->i_blkbits;
    2816                 :          0 :         length = offset & (blocksize - 1);
    2817                 :            : 
    2818                 :            :         /* Block boundary? Nothing to do */
    2819         [ #  # ]:          0 :         if (!length)
    2820                 :            :                 return 0;
    2821                 :            : 
    2822                 :          0 :         length = blocksize - length;
    2823                 :          0 :         iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
    2824                 :            :         
    2825                 :            :         page = grab_cache_page(mapping, index);
    2826                 :            :         err = -ENOMEM;
    2827         [ #  # ]:          0 :         if (!page)
    2828                 :            :                 goto out;
    2829                 :            : 
    2830         [ #  # ]:          0 :         if (!page_has_buffers(page))
    2831                 :          0 :                 create_empty_buffers(page, blocksize, 0);
    2832                 :            : 
    2833                 :            :         /* Find the buffer that contains "offset" */
    2834         [ #  # ]:          0 :         bh = page_buffers(page);
    2835                 :            :         pos = blocksize;
    2836         [ #  # ]:          0 :         while (offset >= pos) {
    2837                 :          0 :                 bh = bh->b_this_page;
    2838                 :          0 :                 iblock++;
    2839                 :          0 :                 pos += blocksize;
    2840                 :            :         }
    2841                 :            : 
    2842                 :            :         err = 0;
    2843         [ #  # ]:          0 :         if (!buffer_mapped(bh)) {
    2844         [ #  # ]:          0 :                 WARN_ON(bh->b_size != blocksize);
    2845                 :          0 :                 err = get_block(inode, iblock, bh, 0);
    2846         [ #  # ]:          0 :                 if (err)
    2847                 :            :                         goto unlock;
    2848                 :            :                 /* unmapped? It's a hole - nothing to do */
    2849         [ #  # ]:          0 :                 if (!buffer_mapped(bh))
    2850                 :            :                         goto unlock;
    2851                 :            :         }
    2852                 :            : 
    2853                 :            :         /* Ok, it's mapped. Make sure it's up-to-date */
    2854         [ #  # ]:          0 :         if (PageUptodate(page))
    2855                 :          0 :                 set_buffer_uptodate(bh);
    2856                 :            : 
    2857 [ #  # ][ #  # ]:          0 :         if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
                 [ #  # ]
    2858                 :            :                 err = -EIO;
    2859                 :          0 :                 ll_rw_block(READ, 1, &bh);
    2860                 :          0 :                 wait_on_buffer(bh);
    2861                 :            :                 /* Uhhuh. Read error. Complain and punt. */
    2862         [ #  # ]:          0 :                 if (!buffer_uptodate(bh))
    2863                 :            :                         goto unlock;
    2864                 :            :         }
    2865                 :            : 
    2866                 :            :         zero_user(page, offset, length);
    2867                 :          0 :         mark_buffer_dirty(bh);
    2868                 :            :         err = 0;
    2869                 :            : 
    2870                 :            : unlock:
    2871                 :          0 :         unlock_page(page);
    2872                 :          0 :         page_cache_release(page);
    2873                 :            : out:
    2874                 :          0 :         return err;
    2875                 :            : }
    2876                 :            : EXPORT_SYMBOL(block_truncate_page);
    2877                 :            : 
    2878                 :            : /*
    2879                 :            :  * The generic ->writepage function for buffer-backed address_spaces
    2880                 :            :  * this form passes in the end_io handler used to finish the IO.
    2881                 :            :  */
    2882                 :          0 : int block_write_full_page_endio(struct page *page, get_block_t *get_block,
    2883                 :            :                         struct writeback_control *wbc, bh_end_io_t *handler)
    2884                 :            : {
    2885                 :      26055 :         struct inode * const inode = page->mapping->host;
    2886                 :            :         loff_t i_size = i_size_read(inode);
    2887                 :      26055 :         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
    2888                 :            :         unsigned offset;
    2889                 :            : 
    2890                 :            :         /* Is the page fully inside i_size? */
    2891         [ +  - ]:      26055 :         if (page->index < end_index)
    2892                 :      26055 :                 return __block_write_full_page(inode, page, get_block, wbc,
    2893                 :            :                                                handler);
    2894                 :            : 
    2895                 :            :         /* Is the page fully outside i_size? (truncate in progress) */
    2896                 :          0 :         offset = i_size & (PAGE_CACHE_SIZE-1);
    2897 [ #  # ][ #  # ]:          0 :         if (page->index >= end_index+1 || !offset) {
    2898                 :            :                 /*
    2899                 :            :                  * The page may have dirty, unmapped buffers.  For example,
    2900                 :            :                  * they may have been added in ext3_writepage().  Make them
    2901                 :            :                  * freeable here, so the page does not leak.
    2902                 :            :                  */
    2903                 :          0 :                 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
    2904                 :          0 :                 unlock_page(page);
    2905                 :          0 :                 return 0; /* don't care */
    2906                 :            :         }
    2907                 :            : 
    2908                 :            :         /*
    2909                 :            :          * The page straddles i_size.  It must be zeroed out on each and every
    2910                 :            :          * writepage invocation because it may be mmapped.  "A file is mapped
    2911                 :            :          * in multiples of the page size.  For a file that is not a multiple of
    2912                 :            :          * the  page size, the remaining memory is zeroed when mapped, and
    2913                 :            :          * writes to that region are not written out to the file."
    2914                 :            :          */
    2915                 :            :         zero_user_segment(page, offset, PAGE_CACHE_SIZE);
    2916                 :          0 :         return __block_write_full_page(inode, page, get_block, wbc, handler);
    2917                 :            : }
    2918                 :            : EXPORT_SYMBOL(block_write_full_page_endio);
    2919                 :            : 
    2920                 :            : /*
    2921                 :            :  * The generic ->writepage function for buffer-backed address_spaces
    2922                 :            :  */
    2923                 :          0 : int block_write_full_page(struct page *page, get_block_t *get_block,
    2924                 :            :                         struct writeback_control *wbc)
    2925                 :            : {
    2926                 :      26055 :         return block_write_full_page_endio(page, get_block, wbc,
    2927                 :            :                                            end_buffer_async_write);
    2928                 :            : }
    2929                 :            : EXPORT_SYMBOL(block_write_full_page);
    2930                 :            : 
    2931                 :          0 : sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
    2932                 :            :                             get_block_t *get_block)
    2933                 :            : {
    2934                 :            :         struct buffer_head tmp;
    2935                 :     145402 :         struct inode *inode = mapping->host;
    2936                 :     145402 :         tmp.b_state = 0;
    2937                 :     145402 :         tmp.b_blocknr = 0;
    2938                 :     145402 :         tmp.b_size = 1 << inode->i_blkbits;
    2939                 :     145402 :         get_block(inode, block, &tmp, 0);
    2940                 :     145402 :         return tmp.b_blocknr;
    2941                 :            : }
    2942                 :            : EXPORT_SYMBOL(generic_block_bmap);
    2943                 :            : 
    2944                 :          0 : static void end_bio_bh_io_sync(struct bio *bio, int err)
    2945                 :            : {
    2946                 :     138168 :         struct buffer_head *bh = bio->bi_private;
    2947                 :            : 
    2948         [ -  + ]:     138168 :         if (err == -EOPNOTSUPP) {
    2949                 :          0 :                 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
    2950                 :            :         }
    2951                 :            : 
    2952         [ -  + ]:     138168 :         if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
    2953                 :          0 :                 set_bit(BH_Quiet, &bh->b_state);
    2954                 :            : 
    2955                 :     138168 :         bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
    2956                 :     138168 :         bio_put(bio);
    2957                 :     138168 : }
    2958                 :            : 
    2959                 :            : /*
    2960                 :            :  * This allows us to do IO even on the odd last sectors
    2961                 :            :  * of a device, even if the bh block size is some multiple
    2962                 :            :  * of the physical sector size.
    2963                 :            :  *
    2964                 :            :  * We'll just truncate the bio to the size of the device,
    2965                 :            :  * and clear the end of the buffer head manually.
    2966                 :            :  *
    2967                 :            :  * Truly out-of-range accesses will turn into actual IO
    2968                 :            :  * errors, this only handles the "we need to be able to
    2969                 :            :  * do IO at the final sector" case.
    2970                 :            :  */
    2971                 :          0 : static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
    2972                 :            : {
    2973                 :            :         sector_t maxsector;
    2974                 :            :         unsigned bytes;
    2975                 :            : 
    2976                 :     138168 :         maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
    2977         [ +  - ]:     138167 :         if (!maxsector)
    2978                 :            :                 return;
    2979                 :            : 
    2980                 :            :         /*
    2981                 :            :          * If the *whole* IO is past the end of the device,
    2982                 :            :          * let it through, and the IO layer will turn it into
    2983                 :            :          * an EIO.
    2984                 :            :          */
    2985         [ +  - ]:     138167 :         if (unlikely(bio->bi_sector >= maxsector))
    2986                 :            :                 return;
    2987                 :            : 
    2988                 :     138167 :         maxsector -= bio->bi_sector;
    2989                 :     138167 :         bytes = bio->bi_size;
    2990         [ -  + ]:     138167 :         if (likely((bytes >> 9) <= maxsector))
    2991                 :            :                 return;
    2992                 :            : 
    2993                 :            :         /* Uhhuh. We've got a bh that straddles the device size! */
    2994                 :          0 :         bytes = maxsector << 9;
    2995                 :            : 
    2996                 :            :         /* Truncate the bio.. */
    2997                 :          0 :         bio->bi_size = bytes;
    2998                 :          0 :         bio->bi_io_vec[0].bv_len = bytes;
    2999                 :            : 
    3000                 :            :         /* ..and clear the end of the buffer for reads */
    3001         [ #  # ]:          0 :         if ((rw & RW_MASK) == READ) {
    3002                 :          0 :                 void *kaddr = kmap_atomic(bh->b_page);
    3003         [ #  # ]:          0 :                 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
    3004                 :          0 :                 kunmap_atomic(kaddr);
    3005                 :          0 :                 flush_dcache_page(bh->b_page);
    3006                 :            :         }
    3007                 :            : }
    3008                 :            : 
    3009                 :          0 : int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
    3010                 :            : {
    3011                 :            :         struct bio *bio;
    3012                 :            :         int ret = 0;
    3013                 :            : 
    3014         [ -  + ]:     138168 :         BUG_ON(!buffer_locked(bh));
    3015         [ -  + ]:     138168 :         BUG_ON(!buffer_mapped(bh));
    3016         [ -  + ]:     138168 :         BUG_ON(!bh->b_end_io);
    3017         [ -  + ]:     138168 :         BUG_ON(buffer_delay(bh));
    3018         [ -  + ]:     138168 :         BUG_ON(buffer_unwritten(bh));
    3019                 :            : 
    3020                 :            :         /*
    3021                 :            :          * Only clear out a write error when rewriting
    3022                 :            :          */
    3023 [ +  + ][ +  - ]:     138168 :         if (test_set_buffer_req(bh) && (rw & WRITE))
    3024                 :            :                 clear_buffer_write_io_error(bh);
    3025                 :            : 
    3026                 :            :         /*
    3027                 :            :          * from here on down, it's all bio -- do the initial mapping,
    3028                 :            :          * submit_bio -> generic_make_request may further map this bio around
    3029                 :            :          */
    3030                 :            :         bio = bio_alloc(GFP_NOIO, 1);
    3031                 :            : 
    3032                 :     138167 :         bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
    3033                 :     138167 :         bio->bi_bdev = bh->b_bdev;
    3034                 :     138167 :         bio->bi_io_vec[0].bv_page = bh->b_page;
    3035                 :     138167 :         bio->bi_io_vec[0].bv_len = bh->b_size;
    3036                 :     138167 :         bio->bi_io_vec[0].bv_offset = bh_offset(bh);
    3037                 :            : 
    3038                 :     138167 :         bio->bi_vcnt = 1;
    3039                 :     138167 :         bio->bi_size = bh->b_size;
    3040                 :            : 
    3041                 :     138167 :         bio->bi_end_io = end_bio_bh_io_sync;
    3042                 :     138167 :         bio->bi_private = bh;
    3043                 :     138167 :         bio->bi_flags |= bio_flags;
    3044                 :            : 
    3045                 :            :         /* Take care of bh's that straddle the end of the device */
    3046                 :     138167 :         guard_bh_eod(rw, bio, bh);
    3047                 :            : 
    3048         [ +  + ]:     138167 :         if (buffer_meta(bh))
    3049                 :      19174 :                 rw |= REQ_META;
    3050         [ +  + ]:     138167 :         if (buffer_prio(bh))
    3051                 :      19174 :                 rw |= REQ_PRIO;
    3052                 :            : 
    3053                 :          0 :         bio_get(bio);
    3054                 :     138167 :         submit_bio(rw, bio);
    3055                 :            : 
    3056         [ -  + ]:     138165 :         if (bio_flagged(bio, BIO_EOPNOTSUPP))
    3057                 :            :                 ret = -EOPNOTSUPP;
    3058                 :            : 
    3059                 :     138165 :         bio_put(bio);
    3060                 :     138165 :         return ret;
    3061                 :            : }
    3062                 :            : EXPORT_SYMBOL_GPL(_submit_bh);
    3063                 :            : 
    3064                 :          0 : int submit_bh(int rw, struct buffer_head *bh)
    3065                 :            : {
    3066                 :     138168 :         return _submit_bh(rw, bh, 0);
    3067                 :            : }
    3068                 :            : EXPORT_SYMBOL(submit_bh);
    3069                 :            : 
    3070                 :            : /**
    3071                 :            :  * ll_rw_block: low-level access to block devices (DEPRECATED)
    3072                 :            :  * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
    3073                 :            :  * @nr: number of &struct buffer_heads in the array
    3074                 :            :  * @bhs: array of pointers to &struct buffer_head
    3075                 :            :  *
    3076                 :            :  * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
    3077                 :            :  * requests an I/O operation on them, either a %READ or a %WRITE.  The third
    3078                 :            :  * %READA option is described in the documentation for generic_make_request()
    3079                 :            :  * which ll_rw_block() calls.
    3080                 :            :  *
    3081                 :            :  * This function drops any buffer that it cannot get a lock on (with the
    3082                 :            :  * BH_Lock state bit), any buffer that appears to be clean when doing a write
    3083                 :            :  * request, and any buffer that appears to be up-to-date when doing read
    3084                 :            :  * request.  Further it marks as clean buffers that are processed for
    3085                 :            :  * writing (the buffer cache won't assume that they are actually clean
    3086                 :            :  * until the buffer gets unlocked).
    3087                 :            :  *
    3088                 :            :  * ll_rw_block sets b_end_io to simple completion handler that marks
    3089                 :            :  * the buffer up-to-date (if approriate), unlocks the buffer and wakes
    3090                 :            :  * any waiters. 
    3091                 :            :  *
    3092                 :            :  * All of the buffers must be for the same device, and must also be a
    3093                 :            :  * multiple of the current approved size for the device.
    3094                 :            :  */
    3095                 :          0 : void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
    3096                 :            : {
    3097                 :            :         int i;
    3098                 :            : 
    3099         [ +  + ]:    2159402 :         for (i = 0; i < nr; i++) {
    3100                 :    1079704 :                 struct buffer_head *bh = bhs[i];
    3101                 :            : 
    3102         [ +  + ]:    1079707 :                 if (!trylock_buffer(bh))
    3103                 :        208 :                         continue;
    3104         [ -  + ]:    1079499 :                 if (rw == WRITE) {
    3105         [ #  # ]:          0 :                         if (test_clear_buffer_dirty(bh)) {
    3106                 :          0 :                                 bh->b_end_io = end_buffer_write_sync;
    3107                 :            :                                 get_bh(bh);
    3108                 :            :                                 submit_bh(WRITE, bh);
    3109                 :          0 :                                 continue;
    3110                 :            :                         }
    3111                 :            :                 } else {
    3112         [ +  + ]:    1079499 :                         if (!buffer_uptodate(bh)) {
    3113                 :       5076 :                                 bh->b_end_io = end_buffer_read_sync;
    3114                 :            :                                 get_bh(bh);
    3115                 :            :                                 submit_bh(rw, bh);
    3116                 :       5076 :                                 continue;
    3117                 :            :                         }
    3118                 :            :                 }
    3119                 :    1074423 :                 unlock_buffer(bh);
    3120                 :            :         }
    3121                 :    1079698 : }
    3122                 :            : EXPORT_SYMBOL(ll_rw_block);
    3123                 :            : 
    3124                 :          0 : void write_dirty_buffer(struct buffer_head *bh, int rw)
    3125                 :            : {
    3126                 :            :         lock_buffer(bh);
    3127         [ -  + ]:        610 :         if (!test_clear_buffer_dirty(bh)) {
    3128                 :          0 :                 unlock_buffer(bh);
    3129                 :          0 :                 return;
    3130                 :            :         }
    3131                 :        610 :         bh->b_end_io = end_buffer_write_sync;
    3132                 :            :         get_bh(bh);
    3133                 :            :         submit_bh(rw, bh);
    3134                 :            : }
    3135                 :            : EXPORT_SYMBOL(write_dirty_buffer);
    3136                 :            : 
    3137                 :            : /*
    3138                 :            :  * For a data-integrity writeout, we need to wait upon any in-progress I/O
    3139                 :            :  * and then start new I/O and then wait upon it.  The caller must have a ref on
    3140                 :            :  * the buffer_head.
    3141                 :            :  */
    3142                 :          0 : int __sync_dirty_buffer(struct buffer_head *bh, int rw)
    3143                 :            : {
    3144                 :            :         int ret = 0;
    3145                 :            : 
    3146         [ #  # ]:          0 :         WARN_ON(atomic_read(&bh->b_count) < 1);
    3147                 :            :         lock_buffer(bh);
    3148         [ #  # ]:          0 :         if (test_clear_buffer_dirty(bh)) {
    3149                 :            :                 get_bh(bh);
    3150                 :          0 :                 bh->b_end_io = end_buffer_write_sync;
    3151                 :            :                 ret = submit_bh(rw, bh);
    3152                 :            :                 wait_on_buffer(bh);
    3153 [ #  # ][ #  # ]:          0 :                 if (!ret && !buffer_uptodate(bh))
    3154                 :            :                         ret = -EIO;
    3155                 :            :         } else {
    3156                 :          0 :                 unlock_buffer(bh);
    3157                 :            :         }
    3158                 :          0 :         return ret;
    3159                 :            : }
    3160                 :            : EXPORT_SYMBOL(__sync_dirty_buffer);
    3161                 :            : 
    3162                 :          0 : int sync_dirty_buffer(struct buffer_head *bh)
    3163                 :            : {
    3164                 :          0 :         return __sync_dirty_buffer(bh, WRITE_SYNC);
    3165                 :            : }
    3166                 :            : EXPORT_SYMBOL(sync_dirty_buffer);
    3167                 :            : 
    3168                 :            : /*
    3169                 :            :  * try_to_free_buffers() checks if all the buffers on this particular page
    3170                 :            :  * are unused, and releases them if so.
    3171                 :            :  *
    3172                 :            :  * Exclusion against try_to_free_buffers may be obtained by either
    3173                 :            :  * locking the page or by holding its mapping's private_lock.
    3174                 :            :  *
    3175                 :            :  * If the page is dirty but all the buffers are clean then we need to
    3176                 :            :  * be sure to mark the page clean as well.  This is because the page
    3177                 :            :  * may be against a block device, and a later reattachment of buffers
    3178                 :            :  * to a dirty page will set *all* buffers dirty.  Which would corrupt
    3179                 :            :  * filesystem data on the same device.
    3180                 :            :  *
    3181                 :            :  * The same applies to regular filesystem pages: if all the buffers are
    3182                 :            :  * clean then we set the page clean and proceed.  To do that, we require
    3183                 :            :  * total exclusion from __set_page_dirty_buffers().  That is obtained with
    3184                 :            :  * private_lock.
    3185                 :            :  *
    3186                 :            :  * try_to_free_buffers() is non-blocking.
    3187                 :            :  */
    3188                 :            : static inline int buffer_busy(struct buffer_head *bh)
    3189                 :            : {
    3190                 :    3858036 :         return atomic_read(&bh->b_count) |
    3191                 :    1929018 :                 (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
    3192                 :            : }
    3193                 :            : 
    3194                 :            : static int
    3195                 :          0 : drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
    3196                 :            : {
    3197         [ -  + ]:    1929030 :         struct buffer_head *head = page_buffers(page);
    3198                 :            :         struct buffer_head *bh;
    3199                 :            : 
    3200                 :            :         bh = head;
    3201                 :            :         do {
    3202 [ -  + ][ #  # ]:    1929031 :                 if (buffer_write_io_error(bh) && page->mapping)
    3203                 :          0 :                         set_bit(AS_EIO, &page->mapping->flags);
    3204         [ +  + ]:    1929018 :                 if (buffer_busy(bh))
    3205                 :            :                         goto failed;
    3206                 :    1913315 :                 bh = bh->b_this_page;
    3207         [ +  + ]:    1913315 :         } while (bh != head);
    3208                 :            : 
    3209                 :            :         do {
    3210                 :    1913317 :                 struct buffer_head *next = bh->b_this_page;
    3211                 :            : 
    3212         [ -  + ]:    1913317 :                 if (bh->b_assoc_map)
    3213                 :          0 :                         __remove_assoc_queue(bh);
    3214                 :            :                 bh = next;
    3215         [ +  + ]:    1913321 :         } while (bh != head);
    3216                 :    1913318 :         *buffers_to_free = head;
    3217                 :    1913318 :         __clear_page_buffers(page);
    3218                 :    1913325 :         return 1;
    3219                 :            : failed:
    3220                 :            :         return 0;
    3221                 :            : }
    3222                 :            : 
    3223                 :          0 : int try_to_free_buffers(struct page *page)
    3224                 :            : {
    3225                 :    1928973 :         struct address_space * const mapping = page->mapping;
    3226                 :    1928973 :         struct buffer_head *buffers_to_free = NULL;
    3227                 :            :         int ret = 0;
    3228                 :            : 
    3229         [ -  + ]:    1928973 :         BUG_ON(!PageLocked(page));
    3230            [ + ]:    1928973 :         if (PageWriteback(page))
    3231                 :            :                 return 0;
    3232                 :            : 
    3233         [ +  + ]:    1928996 :         if (mapping == NULL) {          /* can this still happen? */
    3234                 :         79 :                 ret = drop_buffers(page, &buffers_to_free);
    3235                 :         79 :                 goto out;
    3236                 :            :         }
    3237                 :            : 
    3238                 :            :         spin_lock(&mapping->private_lock);
    3239                 :    1928961 :         ret = drop_buffers(page, &buffers_to_free);
    3240                 :            : 
    3241                 :            :         /*
    3242                 :            :          * If the filesystem writes its buffers by hand (eg ext3)
    3243                 :            :          * then we can have clean buffers against a dirty page.  We
    3244                 :            :          * clean the page here; otherwise the VM will never notice
    3245                 :            :          * that the filesystem did any IO at all.
    3246                 :            :          *
    3247                 :            :          * Also, during truncate, discard_buffer will have marked all
    3248                 :            :          * the page's buffers clean.  We discover that here and clean
    3249                 :            :          * the page also.
    3250                 :            :          *
    3251                 :            :          * private_lock must be held over this entire operation in order
    3252                 :            :          * to synchronise against __set_page_dirty_buffers and prevent the
    3253                 :            :          * dirty bit from being lost.
    3254                 :            :          */
    3255         [ +  + ]:    1928933 :         if (ret)
    3256                 :    1913230 :                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
    3257                 :            :         spin_unlock(&mapping->private_lock);
    3258                 :            : out:
    3259         [ +  + ]:    3858012 :         if (buffers_to_free) {
    3260                 :            :                 struct buffer_head *bh = buffers_to_free;
    3261                 :            : 
    3262                 :            :                 do {
    3263                 :    1913338 :                         struct buffer_head *next = bh->b_this_page;
    3264                 :    1913338 :                         free_buffer_head(bh);
    3265                 :            :                         bh = next;
    3266         [ +  + ]:    1913332 :                 } while (bh != buffers_to_free);
    3267                 :            :         }
    3268                 :    1929033 :         return ret;
    3269                 :            : }
    3270                 :            : EXPORT_SYMBOL(try_to_free_buffers);
    3271                 :            : 
    3272                 :            : /*
    3273                 :            :  * There are no bdflush tunables left.  But distributions are
    3274                 :            :  * still running obsolete flush daemons, so we terminate them here.
    3275                 :            :  *
    3276                 :            :  * Use of bdflush() is deprecated and will be removed in a future kernel.
    3277                 :            :  * The `flush-X' kernel threads fully replace bdflush daemons and this call.
    3278                 :            :  */
    3279                 :          0 : SYSCALL_DEFINE2(bdflush, int, func, long, data)
    3280                 :            : {
    3281                 :            :         static int msg_count;
    3282                 :            : 
    3283         [ +  - ]:          1 :         if (!capable(CAP_SYS_ADMIN))
    3284                 :            :                 return -EPERM;
    3285                 :            : 
    3286         [ +  - ]:          1 :         if (msg_count < 5) {
    3287                 :          1 :                 msg_count++;
    3288                 :          1 :                 printk(KERN_INFO
    3289                 :            :                         "warning: process `%s' used the obsolete bdflush"
    3290                 :          1 :                         " system call\n", current->comm);
    3291                 :          1 :                 printk(KERN_INFO "Fix your initscripts?\n");
    3292                 :            :         }
    3293                 :            : 
    3294         [ -  + ]:          1 :         if (func == 1)
    3295                 :          0 :                 do_exit(0);
    3296                 :            :         return 0;
    3297                 :            : }
    3298                 :            : 
    3299                 :            : /*
    3300                 :            :  * Buffer-head allocation
    3301                 :            :  */
    3302                 :            : static struct kmem_cache *bh_cachep __read_mostly;
    3303                 :            : 
    3304                 :            : /*
    3305                 :            :  * Once the number of bh's in the machine exceeds this level, we start
    3306                 :            :  * stripping them in writeback.
    3307                 :            :  */
    3308                 :            : static unsigned long max_buffer_heads;
    3309                 :            : 
    3310                 :            : int buffer_heads_over_limit;
    3311                 :            : 
    3312                 :            : struct bh_accounting {
    3313                 :            :         int nr;                 /* Number of live bh's */
    3314                 :            :         int ratelimit;          /* Limit cacheline bouncing */
    3315                 :            : };
    3316                 :            : 
    3317                 :            : static DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
    3318                 :            : 
    3319                 :          0 : static void recalc_bh_state(void)
    3320                 :            : {
    3321                 :            :         int i;
    3322                 :            :         int tot = 0;
    3323                 :            : 
    3324         [ +  + ]:    3952835 :         if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
    3325                 :    3952835 :                 return;
    3326                 :       1930 :         __this_cpu_write(bh_accounting.ratelimit, 0);
    3327         [ +  + ]:    3956695 :         for_each_online_cpu(i)
    3328                 :       1930 :                 tot += per_cpu(bh_accounting, i).nr;
    3329                 :        965 :         buffer_heads_over_limit = (tot > max_buffer_heads);
    3330                 :            : }
    3331                 :            : 
    3332                 :          0 : struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
    3333                 :            : {
    3334                 :    1968873 :         struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
    3335         [ +  + ]:    1969165 :         if (ret) {
    3336                 :    1969047 :                 INIT_LIST_HEAD(&ret->b_assoc_buffers);
    3337                 :    1969047 :                 preempt_disable();
    3338                 :    3937996 :                 __this_cpu_inc(bh_accounting.nr);
    3339                 :    1968998 :                 recalc_bh_state();
    3340                 :    1969099 :                 preempt_enable();
    3341                 :            :         }
    3342                 :        399 :         return ret;
    3343                 :            : }
    3344                 :            : EXPORT_SYMBOL(alloc_buffer_head);
    3345                 :            : 
    3346                 :          0 : void free_buffer_head(struct buffer_head *bh)
    3347                 :            : {
    3348         [ -  + ]:    1983743 :         BUG_ON(!list_empty(&bh->b_assoc_buffers));
    3349                 :    1983743 :         kmem_cache_free(bh_cachep, bh);
    3350                 :    1983738 :         preempt_disable();
    3351                 :    3967478 :         __this_cpu_dec(bh_accounting.nr);
    3352                 :    1983739 :         recalc_bh_state();
    3353                 :    1983738 :         preempt_enable();
    3354                 :    1983736 : }
    3355                 :            : EXPORT_SYMBOL(free_buffer_head);
    3356                 :            : 
    3357                 :          0 : static void buffer_exit_cpu(int cpu)
    3358                 :            : {
    3359                 :            :         int i;
    3360                 :          0 :         struct bh_lru *b = &per_cpu(bh_lrus, cpu);
    3361                 :            : 
    3362         [ #  # ]:          0 :         for (i = 0; i < BH_LRU_SIZE; i++) {
    3363                 :          0 :                 brelse(b->bhs[i]);
    3364                 :          0 :                 b->bhs[i] = NULL;
    3365                 :            :         }
    3366                 :          0 :         this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
    3367                 :          0 :         per_cpu(bh_accounting, cpu).nr = 0;
    3368                 :          0 : }
    3369                 :            : 
    3370                 :          0 : static int buffer_cpu_notify(struct notifier_block *self,
    3371                 :            :                               unsigned long action, void *hcpu)
    3372                 :            : {
    3373         [ #  # ]:          0 :         if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
    3374                 :          0 :                 buffer_exit_cpu((unsigned long)hcpu);
    3375                 :          0 :         return NOTIFY_OK;
    3376                 :            : }
    3377                 :            : 
    3378                 :            : /**
    3379                 :            :  * bh_uptodate_or_lock - Test whether the buffer is uptodate
    3380                 :            :  * @bh: struct buffer_head
    3381                 :            :  *
    3382                 :            :  * Return true if the buffer is up-to-date and false,
    3383                 :            :  * with the buffer locked, if not.
    3384                 :            :  */
    3385                 :          0 : int bh_uptodate_or_lock(struct buffer_head *bh)
    3386                 :            : {
    3387         [ +  + ]:     347271 :         if (!buffer_uptodate(bh)) {
    3388                 :            :                 lock_buffer(bh);
    3389         [ -  + ]:          8 :                 if (!buffer_uptodate(bh))
    3390                 :            :                         return 0;
    3391                 :          0 :                 unlock_buffer(bh);
    3392                 :            :         }
    3393                 :            :         return 1;
    3394                 :            : }
    3395                 :            : EXPORT_SYMBOL(bh_uptodate_or_lock);
    3396                 :            : 
    3397                 :            : /**
    3398                 :            :  * bh_submit_read - Submit a locked buffer for reading
    3399                 :            :  * @bh: struct buffer_head
    3400                 :            :  *
    3401                 :            :  * Returns zero on success and -EIO on error.
    3402                 :            :  */
    3403                 :          0 : int bh_submit_read(struct buffer_head *bh)
    3404                 :            : {
    3405         [ -  + ]:          8 :         BUG_ON(!buffer_locked(bh));
    3406                 :            : 
    3407         [ -  + ]:          8 :         if (buffer_uptodate(bh)) {
    3408                 :          0 :                 unlock_buffer(bh);
    3409                 :          0 :                 return 0;
    3410                 :            :         }
    3411                 :            : 
    3412                 :            :         get_bh(bh);
    3413                 :          8 :         bh->b_end_io = end_buffer_read_sync;
    3414                 :            :         submit_bh(READ, bh);
    3415                 :            :         wait_on_buffer(bh);
    3416         [ -  + ]:         16 :         if (buffer_uptodate(bh))
    3417                 :            :                 return 0;
    3418                 :          0 :         return -EIO;
    3419                 :            : }
    3420                 :            : EXPORT_SYMBOL(bh_submit_read);
    3421                 :            : 
    3422                 :          0 : void __init buffer_init(void)
    3423                 :            : {
    3424                 :            :         unsigned long nrpages;
    3425                 :            : 
    3426                 :          0 :         bh_cachep = kmem_cache_create("buffer_head",
    3427                 :            :                         sizeof(struct buffer_head), 0,
    3428                 :            :                                 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
    3429                 :            :                                 SLAB_MEM_SPREAD),
    3430                 :            :                                 NULL);
    3431                 :            : 
    3432                 :            :         /*
    3433                 :            :          * Limit the bh occupancy to 10% of ZONE_NORMAL
    3434                 :            :          */
    3435                 :          0 :         nrpages = (nr_free_buffer_pages() * 10) / 100;
    3436                 :          0 :         max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
    3437                 :          0 :         hotcpu_notifier(buffer_cpu_notify, 0);
    3438                 :          0 : }

Generated by: LCOV version 1.9