LCOV - coverage.info - mm/filemap.c

LCOV - code coverage report

Current view:	top level - mm - filemap.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	515	756	68.1 %
Date:	2014-02-18	Functions:	54	65	83.1 %
		Branches:	301	534	56.4 %

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :  *      linux/mm/filemap.c
       3                 :            :  *
       4                 :            :  * Copyright (C) 1994-1999  Linus Torvalds
       5                 :            :  */
       6                 :            : 
       7                 :            : /*
       8                 :            :  * This file handles the generic file mmap semantics used by
       9                 :            :  * most "normal" filesystems (but you don't /have/ to use this:
      10                 :            :  * the NFS filesystem used to do this differently, for example)
      11                 :            :  */
      12                 :            : #include <linux/export.h>
      13                 :            : #include <linux/compiler.h>
      14                 :            : #include <linux/fs.h>
      15                 :            : #include <linux/uaccess.h>
      16                 :            : #include <linux/aio.h>
      17                 :            : #include <linux/capability.h>
      18                 :            : #include <linux/kernel_stat.h>
      19                 :            : #include <linux/gfp.h>
      20                 :            : #include <linux/mm.h>
      21                 :            : #include <linux/swap.h>
      22                 :            : #include <linux/mman.h>
      23                 :            : #include <linux/pagemap.h>
      24                 :            : #include <linux/file.h>
      25                 :            : #include <linux/uio.h>
      26                 :            : #include <linux/hash.h>
      27                 :            : #include <linux/writeback.h>
      28                 :            : #include <linux/backing-dev.h>
      29                 :            : #include <linux/pagevec.h>
      30                 :            : #include <linux/blkdev.h>
      31                 :            : #include <linux/security.h>
      32                 :            : #include <linux/cpuset.h>
      33                 :            : #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
      34                 :            : #include <linux/memcontrol.h>
      35                 :            : #include <linux/cleancache.h>
      36                 :            : #include "internal.h"
      37                 :            : 
      38                 :            : #define CREATE_TRACE_POINTS
      39                 :            : #include <trace/events/filemap.h>
      40                 :            : 
      41                 :            : /*
      42                 :            :  * FIXME: remove all knowledge of the buffer layer from the core VM
      43                 :            :  */
      44                 :            : #include <linux/buffer_head.h> /* for try_to_free_buffers */
      45                 :            : 
      46                 :            : #include <asm/mman.h>
      47                 :            : 
      48                 :            : /*
      49                 :            :  * Shared mappings implemented 30.11.1994. It's not fully working yet,
      50                 :            :  * though.
      51                 :            :  *
      52                 :            :  * Shared mappings now work. 15.8.1995  Bruno.
      53                 :            :  *
      54                 :            :  * finished 'unifying' the page and buffer cache and SMP-threaded the
      55                 :            :  * page-cache, 21.05.1999, Ingo Molnar <mingo@redhat.com>
      56                 :            :  *
      57                 :            :  * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
      58                 :            :  */
      59                 :            : 
      60                 :            : /*
      61                 :            :  * Lock ordering:
      62                 :            :  *
      63                 :            :  *  ->i_mmap_mutex           (truncate_pagecache)
      64                 :            :  *    ->private_lock         (__free_pte->__set_page_dirty_buffers)
      65                 :            :  *      ->swap_lock          (exclusive_swap_page, others)
      66                 :            :  *        ->mapping->tree_lock
      67                 :            :  *
      68                 :            :  *  ->i_mutex
      69                 :            :  *    ->i_mmap_mutex         (truncate->unmap_mapping_range)
      70                 :            :  *
      71                 :            :  *  ->mmap_sem
      72                 :            :  *    ->i_mmap_mutex
      73                 :            :  *      ->page_table_lock or pte_lock        (various, mainly in memory.c)
      74                 :            :  *        ->mapping->tree_lock    (arch-dependent flush_dcache_mmap_lock)
      75                 :            :  *
      76                 :            :  *  ->mmap_sem
      77                 :            :  *    ->lock_page            (access_process_vm)
      78                 :            :  *
      79                 :            :  *  ->i_mutex                        (generic_file_buffered_write)
      80                 :            :  *    ->mmap_sem             (fault_in_pages_readable->do_page_fault)
      81                 :            :  *
      82                 :            :  *  bdi->wb.list_lock
      83                 :            :  *    sb_lock                   (fs/fs-writeback.c)
      84                 :            :  *    ->mapping->tree_lock        (__sync_single_inode)
      85                 :            :  *
      86                 :            :  *  ->i_mmap_mutex
      87                 :            :  *    ->anon_vma.lock                (vma_adjust)
      88                 :            :  *
      89                 :            :  *  ->anon_vma.lock
      90                 :            :  *    ->page_table_lock or pte_lock  (anon_vma_prepare and various)
      91                 :            :  *
      92                 :            :  *  ->page_table_lock or pte_lock
      93                 :            :  *    ->swap_lock            (try_to_unmap_one)
      94                 :            :  *    ->private_lock         (try_to_unmap_one)
      95                 :            :  *    ->tree_lock            (try_to_unmap_one)
      96                 :            :  *    ->zone.lru_lock                (follow_page->mark_page_accessed)
      97                 :            :  *    ->zone.lru_lock                (check_pte_range->isolate_lru_page)
      98                 :            :  *    ->private_lock         (page_remove_rmap->set_page_dirty)
      99                 :            :  *    ->tree_lock            (page_remove_rmap->set_page_dirty)
     100                 :            :  *    bdi.wb->list_lock              (page_remove_rmap->set_page_dirty)
     101                 :            :  *    ->inode->i_lock             (page_remove_rmap->set_page_dirty)
     102                 :            :  *    bdi.wb->list_lock              (zap_pte_range->set_page_dirty)
     103                 :            :  *    ->inode->i_lock             (zap_pte_range->set_page_dirty)
     104                 :            :  *    ->private_lock         (zap_pte_range->__set_page_dirty_buffers)
     105                 :            :  *
     106                 :            :  * ->i_mmap_mutex
     107                 :            :  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
     108                 :            :  */
     109                 :            : 
     110                 :            : /*
     111                 :            :  * Delete a page from the page cache and free it. Caller has to make
     112                 :            :  * sure the page is locked and that nobody else uses it - or that usage
     113                 :            :  * is safe.  The caller must hold the mapping's tree_lock.
     114                 :            :  */
     115                 :          0 : void __delete_from_page_cache(struct page *page)
     116                 :            : {
     117                 :    2187170 :         struct address_space *mapping = page->mapping;
     118                 :            : 
     119                 :            :         trace_mm_filemap_delete_from_page_cache(page);
     120                 :            :         /*
     121                 :            :          * if we're uptodate, flush out into the cleancache, otherwise
     122                 :            :          * invalidate any existing cleancache entries.  We can't leave
     123                 :            :          * stale data around in the cleancache once our page is gone
     124                 :            :          */
     125         [ +  + ]:    2187182 :         if (PageUptodate(page) && PageMappedToDisk(page))
     126                 :            :                 cleancache_put_page(page);
     127                 :            :         else
     128                 :            :                 cleancache_invalidate_page(mapping, page);
     129                 :            : 
     130                 :    2187182 :         radix_tree_delete(&mapping->page_tree, page->index);
     131                 :    2187155 :         page->mapping = NULL;
     132                 :            :         /* Leave page->index set: truncation lookup relies upon it */
     133                 :    2187155 :         mapping->nrpages--;
     134                 :    2187155 :         __dec_zone_page_state(page, NR_FILE_PAGES);
     135         [ +  + ]:    2187163 :         if (PageSwapBacked(page))
     136                 :      11323 :                 __dec_zone_page_state(page, NR_SHMEM);
     137         [ -  + ]:    2187163 :         BUG_ON(page_mapped(page));
     138                 :            : 
     139                 :            :         /*
     140                 :            :          * Some filesystems seem to re-dirty the page even after
     141                 :            :          * the VM has canceled the dirty bit (eg ext3 journaling).
     142                 :            :          *
     143                 :            :          * Fix it up by doing a final dirty accounting check after
     144                 :            :          * having removed the page entirely.
     145                 :            :          */
     146 [ -  + ][ #  # ]:    2187163 :         if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
     147                 :          0 :                 dec_zone_page_state(page, NR_FILE_DIRTY);
     148                 :          0 :                 dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
     149                 :            :         }
     150                 :    2187163 : }
     151                 :            : 
     152                 :            : /**
     153                 :            :  * delete_from_page_cache - delete page from page cache
     154                 :            :  * @page: the page which the kernel is trying to remove from page cache
     155                 :            :  *
     156                 :            :  * This must be called only on pages that have been verified to be in the page
     157                 :            :  * cache and locked.  It will never put the page into the free list, the caller
     158                 :            :  * has a reference on the page.
     159                 :            :  */
     160                 :          0 : void delete_from_page_cache(struct page *page)
     161                 :            : {
     162                 :    1860528 :         struct address_space *mapping = page->mapping;
     163                 :            :         void (*freepage)(struct page *);
     164                 :            : 
     165         [ -  + ]:    1860528 :         BUG_ON(!PageLocked(page));
     166                 :            : 
     167                 :    1860528 :         freepage = mapping->a_ops->freepage;
     168                 :            :         spin_lock_irq(&mapping->tree_lock);
     169                 :    1860531 :         __delete_from_page_cache(page);
     170                 :            :         spin_unlock_irq(&mapping->tree_lock);
     171                 :            :         mem_cgroup_uncharge_cache_page(page);
     172                 :            : 
     173         [ -  + ]:    1860541 :         if (freepage)
     174                 :          0 :                 freepage(page);
     175                 :    1860541 :         page_cache_release(page);
     176                 :    1860521 : }
     177                 :            : EXPORT_SYMBOL(delete_from_page_cache);
     178                 :            : 
     179                 :          0 : static int sleep_on_page(void *word)
     180                 :            : {
     181                 :     193598 :         io_schedule();
     182                 :     160007 :         return 0;
     183                 :            : }
     184                 :            : 
     185                 :          0 : static int sleep_on_page_killable(void *word)
     186                 :            : {
     187                 :            :         sleep_on_page(word);
     188            [ + ]:      33681 :         return fatal_signal_pending(current) ? -EINTR : 0;
     189                 :            : }
     190                 :            : 
     191                 :          0 : static int filemap_check_errors(struct address_space *mapping)
     192                 :            : {
     193                 :            :         int ret = 0;
     194                 :            :         /* Check for outstanding write errors */
     195         [ -  + ]:    2072457 :         if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
     196                 :            :                 ret = -ENOSPC;
     197         [ -  + ]:    2072261 :         if (test_and_clear_bit(AS_EIO, &mapping->flags))
     198                 :            :                 ret = -EIO;
     199                 :    2072091 :         return ret;
     200                 :            : }
     201                 :            : 
     202                 :            : /**
     203                 :            :  * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
     204                 :            :  * @mapping:    address space structure to write
     205                 :            :  * @start:      offset in bytes where the range starts
     206                 :            :  * @end:        offset in bytes where the range ends (inclusive)
     207                 :            :  * @sync_mode:  enable synchronous operation
     208                 :            :  *
     209                 :            :  * Start writeback against all of a mapping's dirty pages that lie
     210                 :            :  * within the byte offsets <start, end> inclusive.
     211                 :            :  *
     212                 :            :  * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
     213                 :            :  * opposed to a regular memory cleansing writeback.  The difference between
     214                 :            :  * these two operations is that if a dirty page/buffer is encountered, it must
     215                 :            :  * be waited upon, and not just skipped over.
     216                 :            :  */
     217                 :          0 : int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
     218                 :            :                                 loff_t end, int sync_mode)
     219                 :            : {
     220                 :            :         int ret;
     221                 :      81484 :         struct writeback_control wbc = {
     222                 :            :                 .sync_mode = sync_mode,
     223                 :            :                 .nr_to_write = LONG_MAX,
     224                 :            :                 .range_start = start,
     225                 :            :                 .range_end = end,
     226                 :            :         };
     227                 :            : 
     228            [ + ]:      81484 :         if (!mapping_cap_writeback_dirty(mapping))
     229                 :            :                 return 0;
     230                 :            : 
     231                 :      81491 :         ret = do_writepages(mapping, &wbc);
     232                 :      81438 :         return ret;
     233                 :            : }
     234                 :            : 
     235                 :            : static inline int __filemap_fdatawrite(struct address_space *mapping,
     236                 :            :         int sync_mode)
     237                 :            : {
     238                 :       8274 :         return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
     239                 :            : }
     240                 :            : 
     241                 :          0 : int filemap_fdatawrite(struct address_space *mapping)
     242                 :            : {
     243                 :       2956 :         return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
     244                 :            : }
     245                 :            : EXPORT_SYMBOL(filemap_fdatawrite);
     246                 :            : 
     247                 :          0 : int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
     248                 :            :                                 loff_t end)
     249                 :            : {
     250                 :        302 :         return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
     251                 :            : }
     252                 :            : EXPORT_SYMBOL(filemap_fdatawrite_range);
     253                 :            : 
     254                 :            : /**
     255                 :            :  * filemap_flush - mostly a non-blocking flush
     256                 :            :  * @mapping:    target address_space
     257                 :            :  *
     258                 :            :  * This is a mostly non-blocking flush.  Not suitable for data-integrity
     259                 :            :  * purposes - I/O may not be started against all dirty pages.
     260                 :            :  */
     261                 :          0 : int filemap_flush(struct address_space *mapping)
     262                 :            : {
     263                 :       5314 :         return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
     264                 :            : }
     265                 :            : EXPORT_SYMBOL(filemap_flush);
     266                 :            : 
     267                 :            : /**
     268                 :            :  * filemap_fdatawait_range - wait for writeback to complete
     269                 :            :  * @mapping:            address space structure to wait for
     270                 :            :  * @start_byte:         offset in bytes where the range starts
     271                 :            :  * @end_byte:           offset in bytes where the range ends (inclusive)
     272                 :            :  *
     273                 :            :  * Walk the list of under-writeback pages of the given address space
     274                 :            :  * in the given range and wait for all of them.
     275                 :            :  */
     276                 :          0 : int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
     277                 :            :                             loff_t end_byte)
     278                 :            : {
     279                 :    1954992 :         pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
     280                 :    1954992 :         pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
     281                 :            :         struct pagevec pvec;
     282                 :            :         int nr_pages;
     283                 :            :         int ret2, ret = 0;
     284                 :            : 
     285         [ +  + ]:    1954992 :         if (end_byte < start_byte)
     286                 :            :                 goto out;
     287                 :            : 
     288                 :            :         pagevec_init(&pvec, 0);
     289   [ +  +  +  + ]:    4007111 :         while ((index <= end) &&
     290                 :    1996850 :                         (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
     291                 :            :                         PAGECACHE_TAG_WRITEBACK,
     292                 :    1996850 :                         min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
     293                 :            :                 unsigned i;
     294                 :            : 
     295         [ +  + ]:     519609 :                 for (i = 0; i < nr_pages; i++) {
     296                 :     461871 :                         struct page *page = pvec.pages[i];
     297                 :            : 
     298                 :            :                         /* until radix tree lookup accepts end_index */
     299         [ +  + ]:     461871 :                         if (page->index > end)
     300                 :       1441 :                                 continue;
     301                 :            : 
     302                 :            :                         wait_on_page_writeback(page);
     303         [ -  + ]:     460440 :                         if (TestClearPageError(page))
     304                 :            :                                 ret = -EIO;
     305                 :            :                 }
     306                 :            :                 pagevec_release(&pvec);
     307                 :      57678 :                 cond_resched();
     308                 :            :         }
     309                 :            : out:
     310                 :    1952734 :         ret2 = filemap_check_errors(mapping);
     311            [ + ]:    1954571 :         if (!ret)
     312                 :            :                 ret = ret2;
     313                 :            : 
     314                 :    1954571 :         return ret;
     315                 :            : }
     316                 :            : EXPORT_SYMBOL(filemap_fdatawait_range);
     317                 :            : 
     318                 :            : /**
     319                 :            :  * filemap_fdatawait - wait for all under-writeback pages to complete
     320                 :            :  * @mapping: address space structure to wait for
     321                 :            :  *
     322                 :            :  * Walk the list of under-writeback pages of the given address space
     323                 :            :  * and wait for all of them.
     324                 :            :  */
     325                 :          0 : int filemap_fdatawait(struct address_space *mapping)
     326                 :            : {
     327                 :    1884886 :         loff_t i_size = i_size_read(mapping->host);
     328                 :            : 
     329         [ +  + ]:    1884836 :         if (i_size == 0)
     330                 :            :                 return 0;
     331                 :            : 
     332                 :    1881838 :         return filemap_fdatawait_range(mapping, 0, i_size - 1);
     333                 :            : }
     334                 :            : EXPORT_SYMBOL(filemap_fdatawait);
     335                 :            : 
     336                 :          0 : int filemap_write_and_wait(struct address_space *mapping)
     337                 :            : {
     338                 :            :         int err = 0;
     339                 :            : 
     340         [ +  + ]:        345 :         if (mapping->nrpages) {
     341                 :            :                 err = filemap_fdatawrite(mapping);
     342                 :            :                 /*
     343                 :            :                  * Even if the above returned error, the pages may be
     344                 :            :                  * written partially (e.g. -ENOSPC), so we wait for it.
     345                 :            :                  * But the -EIO is special case, it may indicate the worst
     346                 :            :                  * thing (e.g. bug) happened, so we avoid waiting for it.
     347                 :            :                  */
     348         [ +  - ]:          4 :                 if (err != -EIO) {
     349                 :          4 :                         int err2 = filemap_fdatawait(mapping);
     350         [ +  - ]:          4 :                         if (!err)
     351                 :            :                                 err = err2;
     352                 :            :                 }
     353                 :            :         } else {
     354                 :        341 :                 err = filemap_check_errors(mapping);
     355                 :            :         }
     356                 :          0 :         return err;
     357                 :            : }
     358                 :            : EXPORT_SYMBOL(filemap_write_and_wait);
     359                 :            : 
     360                 :            : /**
     361                 :            :  * filemap_write_and_wait_range - write out & wait on a file range
     362                 :            :  * @mapping:    the address_space for the pages
     363                 :            :  * @lstart:     offset in bytes where the range starts
     364                 :            :  * @lend:       offset in bytes where the range ends (inclusive)
     365                 :            :  *
     366                 :            :  * Write out and wait upon file offsets lstart->lend, inclusive.
     367                 :            :  *
     368                 :            :  * Note that `lend' is inclusive (describes the last byte to be written) so
     369                 :            :  * that this function can be used to write to the very end-of-file (end = -1).
     370                 :            :  */
     371                 :          0 : int filemap_write_and_wait_range(struct address_space *mapping,
     372                 :            :                                  loff_t lstart, loff_t lend)
     373                 :            : {
     374                 :            :         int err = 0;
     375                 :            : 
     376         [ +  + ]:     190938 :         if (mapping->nrpages) {
     377                 :      72907 :                 err = __filemap_fdatawrite_range(mapping, lstart, lend,
     378                 :            :                                                  WB_SYNC_ALL);
     379                 :            :                 /* See comment of filemap_write_and_wait() */
     380         [ +  + ]:      72803 :                 if (err != -EIO) {
     381                 :      72662 :                         int err2 = filemap_fdatawait_range(mapping,
     382                 :            :                                                 lstart, lend);
     383         [ +  + ]:      72918 :                         if (!err)
     384                 :            :                                 err = err2;
     385                 :            :                 }
     386                 :            :         } else {
     387                 :     118031 :                 err = filemap_check_errors(mapping);
     388                 :            :         }
     389                 :        152 :         return err;
     390                 :            : }
     391                 :            : EXPORT_SYMBOL(filemap_write_and_wait_range);
     392                 :            : 
     393                 :            : /**
     394                 :            :  * replace_page_cache_page - replace a pagecache page with a new one
     395                 :            :  * @old:        page to be replaced
     396                 :            :  * @new:        page to replace with
     397                 :            :  * @gfp_mask:   allocation mode
     398                 :            :  *
     399                 :            :  * This function replaces a page in the pagecache with a new one.  On
     400                 :            :  * success it acquires the pagecache reference for the new page and
     401                 :            :  * drops it for the old page.  Both the old and new pages must be
     402                 :            :  * locked.  This function does not add the new page to the LRU, the
     403                 :            :  * caller must do that.
     404                 :            :  *
     405                 :            :  * The remove + add is atomic.  The only way this function can fail is
     406                 :            :  * memory allocation failure.
     407                 :            :  */
     408                 :          0 : int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
     409                 :            : {
     410                 :            :         int error;
     411                 :            : 
     412                 :            :         VM_BUG_ON(!PageLocked(old));
     413                 :            :         VM_BUG_ON(!PageLocked(new));
     414                 :            :         VM_BUG_ON(new->mapping);
     415                 :            : 
     416                 :          0 :         error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
     417         [ #  # ]:          0 :         if (!error) {
     418                 :          0 :                 struct address_space *mapping = old->mapping;
     419                 :            :                 void (*freepage)(struct page *);
     420                 :            : 
     421                 :          0 :                 pgoff_t offset = old->index;
     422                 :          0 :                 freepage = mapping->a_ops->freepage;
     423                 :            : 
     424                 :            :                 page_cache_get(new);
     425                 :          0 :                 new->mapping = mapping;
     426                 :          0 :                 new->index = offset;
     427                 :            : 
     428                 :            :                 spin_lock_irq(&mapping->tree_lock);
     429                 :          0 :                 __delete_from_page_cache(old);
     430                 :          0 :                 error = radix_tree_insert(&mapping->page_tree, offset, new);
     431         [ #  # ]:          0 :                 BUG_ON(error);
     432                 :          0 :                 mapping->nrpages++;
     433                 :          0 :                 __inc_zone_page_state(new, NR_FILE_PAGES);
     434         [ #  # ]:          0 :                 if (PageSwapBacked(new))
     435                 :          0 :                         __inc_zone_page_state(new, NR_SHMEM);
     436                 :            :                 spin_unlock_irq(&mapping->tree_lock);
     437                 :            :                 /* mem_cgroup codes must not be called under tree_lock */
     438                 :            :                 mem_cgroup_replace_page_cache(old, new);
     439                 :            :                 radix_tree_preload_end();
     440         [ #  # ]:          0 :                 if (freepage)
     441                 :          0 :                         freepage(old);
     442                 :          0 :                 page_cache_release(old);
     443                 :            :         }
     444                 :            : 
     445                 :          0 :         return error;
     446                 :            : }
     447                 :            : EXPORT_SYMBOL_GPL(replace_page_cache_page);
     448                 :            : 
     449                 :            : /**
     450                 :            :  * add_to_page_cache_locked - add a locked page to the pagecache
     451                 :            :  * @page:       page to add
     452                 :            :  * @mapping:    the page's address_space
     453                 :            :  * @offset:     page index
     454                 :            :  * @gfp_mask:   page allocation mode
     455                 :            :  *
     456                 :            :  * This function is used to add a page to the pagecache. It must be locked.
     457                 :            :  * This function does not add the page to the LRU.  The caller must do that.
     458                 :            :  */
     459                 :          0 : int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
     460                 :            :                 pgoff_t offset, gfp_t gfp_mask)
     461                 :            : {
     462                 :            :         int error;
     463                 :            : 
     464                 :            :         VM_BUG_ON(!PageLocked(page));
     465                 :            :         VM_BUG_ON(PageSwapBacked(page));
     466                 :            : 
     467                 :            :         error = mem_cgroup_cache_charge(page, current->mm,
     468                 :            :                                         gfp_mask & GFP_RECLAIM_MASK);
     469                 :            :         if (error)
     470                 :            :                 return error;
     471                 :            : 
     472                 :    2176710 :         error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
     473            [ + ]:    2176647 :         if (error) {
     474                 :            :                 mem_cgroup_uncharge_cache_page(page);
     475                 :            :                 return error;
     476                 :            :         }
     477                 :            : 
     478                 :            :         page_cache_get(page);
     479                 :    2176713 :         page->mapping = mapping;
     480                 :    2176713 :         page->index = offset;
     481                 :            : 
     482                 :            :         spin_lock_irq(&mapping->tree_lock);
     483                 :    2176732 :         error = radix_tree_insert(&mapping->page_tree, offset, page);
     484                 :            :         radix_tree_preload_end();
     485         [ +  + ]:    2176707 :         if (unlikely(error))
     486                 :            :                 goto err_insert;
     487                 :    2176665 :         mapping->nrpages++;
     488                 :    2176665 :         __inc_zone_page_state(page, NR_FILE_PAGES);
     489                 :            :         spin_unlock_irq(&mapping->tree_lock);
     490                 :            :         trace_mm_filemap_add_to_page_cache(page);
     491                 :            :         return 0;
     492                 :            : err_insert:
     493                 :         42 :         page->mapping = NULL;
     494                 :            :         /* Leave page->index set: truncation relies upon it */
     495                 :            :         spin_unlock_irq(&mapping->tree_lock);
     496                 :            :         mem_cgroup_uncharge_cache_page(page);
     497                 :         42 :         page_cache_release(page);
     498                 :         42 :         return error;
     499                 :            : }
     500                 :            : EXPORT_SYMBOL(add_to_page_cache_locked);
     501                 :            : 
     502                 :          0 : int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
     503                 :            :                                 pgoff_t offset, gfp_t gfp_mask)
     504                 :            : {
     505                 :            :         int ret;
     506                 :            : 
     507                 :            :         ret = add_to_page_cache(page, mapping, offset, gfp_mask);
     508         [ +  + ]:    2176734 :         if (ret == 0)
     509                 :            :                 lru_cache_add_file(page);
     510                 :          0 :         return ret;
     511                 :            : }
     512                 :            : EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
     513                 :            : 
     514                 :            : #ifdef CONFIG_NUMA
     515                 :            : struct page *__page_cache_alloc(gfp_t gfp)
     516                 :            : {
     517                 :            :         int n;
     518                 :            :         struct page *page;
     519                 :            : 
     520                 :            :         if (cpuset_do_page_mem_spread()) {
     521                 :            :                 unsigned int cpuset_mems_cookie;
     522                 :            :                 do {
     523                 :            :                         cpuset_mems_cookie = get_mems_allowed();
     524                 :            :                         n = cpuset_mem_spread_node();
     525                 :            :                         page = alloc_pages_exact_node(n, gfp, 0);
     526                 :            :                 } while (!put_mems_allowed(cpuset_mems_cookie) && !page);
     527                 :            : 
     528                 :            :                 return page;
     529                 :            :         }
     530                 :            :         return alloc_pages(gfp, 0);
     531                 :            : }
     532                 :            : EXPORT_SYMBOL(__page_cache_alloc);
     533                 :            : #endif
     534                 :            : 
     535                 :            : /*
     536                 :            :  * In order to wait for pages to become available there must be
     537                 :            :  * waitqueues associated with pages. By using a hash table of
     538                 :            :  * waitqueues where the bucket discipline is to maintain all
     539                 :            :  * waiters on the same queue and wake all when any of the pages
     540                 :            :  * become available, and for the woken contexts to check to be
     541                 :            :  * sure the appropriate page became available, this saves space
     542                 :            :  * at a cost of "thundering herd" phenomena during rare hash
     543                 :            :  * collisions.
     544                 :            :  */
     545                 :          0 : static wait_queue_head_t *page_waitqueue(struct page *page)
     546                 :            : {
     547                 :   77986748 :         const struct zone *zone = page_zone(page);
     548                 :            : 
     549                 :  155973496 :         return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
     550                 :            : }
     551                 :            : 
     552                 :            : static inline void wake_up_page(struct page *page, int bit)
     553                 :            : {
     554                 :   77773938 :         __wake_up_bit(page_waitqueue(page), &page->flags, bit);
     555                 :            : }
     556                 :            : 
     557                 :          0 : void wait_on_page_bit(struct page *page, int bit_nr)
     558                 :            : {
     559                 :     307592 :         DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
     560                 :            : 
     561         [ +  + ]:     153796 :         if (test_bit(bit_nr, &page->flags))
     562                 :     153282 :                 __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
     563                 :            :                                                         TASK_UNINTERRUPTIBLE);
     564                 :        185 : }
     565                 :            : EXPORT_SYMBOL(wait_on_page_bit);
     566                 :            : 
     567                 :          0 : int wait_on_page_bit_killable(struct page *page, int bit_nr)
     568                 :            : {
     569                 :      15922 :         DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
     570                 :            : 
     571         [ +  + ]:       7961 :         if (!test_bit(bit_nr, &page->flags))
     572                 :            :                 return 0;
     573                 :            : 
     574                 :       7573 :         return __wait_on_bit(page_waitqueue(page), &wait,
     575                 :            :                              sleep_on_page_killable, TASK_KILLABLE);
     576                 :            : }
     577                 :            : 
     578                 :            : /**
     579                 :            :  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
     580                 :            :  * @page: Page defining the wait queue of interest
     581                 :            :  * @waiter: Waiter to add to the queue
     582                 :            :  *
     583                 :            :  * Add an arbitrary @waiter to the wait queue for the nominated @page.
     584                 :            :  */
     585                 :          0 : void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
     586                 :            : {
     587                 :            :         wait_queue_head_t *q = page_waitqueue(page);
     588                 :            :         unsigned long flags;
     589                 :            : 
     590                 :          0 :         spin_lock_irqsave(&q->lock, flags);
     591                 :            :         __add_wait_queue(q, waiter);
     592                 :            :         spin_unlock_irqrestore(&q->lock, flags);
     593                 :          0 : }
     594                 :            : EXPORT_SYMBOL_GPL(add_page_wait_queue);
     595                 :            : 
     596                 :            : /**
     597                 :            :  * unlock_page - unlock a locked page
     598                 :            :  * @page: the page
     599                 :            :  *
     600                 :            :  * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
     601                 :            :  * Also wakes sleepers in wait_on_page_writeback() because the wakeup
     602                 :            :  * mechananism between PageLocked pages and PageWriteback pages is shared.
     603                 :            :  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
     604                 :            :  *
     605                 :            :  * The mb is necessary to enforce ordering between the clear_bit and the read
     606                 :            :  * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
     607                 :            :  */
     608                 :          0 : void unlock_page(struct page *page)
     609                 :            : {
     610                 :            :         VM_BUG_ON(!PageLocked(page));
     611                 :   76788984 :         clear_bit_unlock(PG_locked, &page->flags);
     612                 :   76808942 :         smp_mb__after_clear_bit();
     613                 :            :         wake_up_page(page, PG_locked);
     614                 :   76781001 : }
     615                 :            : EXPORT_SYMBOL(unlock_page);
     616                 :            : 
     617                 :            : /**
     618                 :            :  * end_page_writeback - end writeback against a page
     619                 :            :  * @page: the page
     620                 :            :  */
     621                 :          0 : void end_page_writeback(struct page *page)
     622                 :            : {
     623         [ +  + ]:     988652 :         if (TestClearPageReclaim(page))
     624                 :        516 :                 rotate_reclaimable_page(page);
     625                 :            : 
     626         [ -  + ]:     988652 :         if (!test_clear_page_writeback(page))
     627                 :          0 :                 BUG();
     628                 :            : 
     629                 :     988652 :         smp_mb__after_clear_bit();
     630                 :            :         wake_up_page(page, PG_writeback);
     631                 :     988652 : }
     632                 :            : EXPORT_SYMBOL(end_page_writeback);
     633                 :            : 
     634                 :            : /**
     635                 :            :  * __lock_page - get a lock on the page, assuming we need to sleep to get it
     636                 :            :  * @page: the page to lock
     637                 :            :  */
     638                 :          0 : void __lock_page(struct page *page)
     639                 :            : {
     640                 :      25352 :         DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
     641                 :            : 
     642                 :      12676 :         __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
     643                 :            :                                                         TASK_UNINTERRUPTIBLE);
     644                 :      12675 : }
     645                 :            : EXPORT_SYMBOL(__lock_page);
     646                 :            : 
     647                 :          0 : int __lock_page_killable(struct page *page)
     648                 :            : {
     649                 :      58522 :         DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
     650                 :            : 
     651                 :      29261 :         return __wait_on_bit_lock(page_waitqueue(page), &wait,
     652                 :            :                                         sleep_on_page_killable, TASK_KILLABLE);
     653                 :            : }
     654                 :            : EXPORT_SYMBOL_GPL(__lock_page_killable);
     655                 :            : 
     656                 :          0 : int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
     657                 :            :                          unsigned int flags)
     658                 :            : {
     659         [ +  + ]:      11156 :         if (flags & FAULT_FLAG_ALLOW_RETRY) {
     660                 :            :                 /*
     661                 :            :                  * CAUTION! In this case, mmap_sem is not released
     662                 :            :                  * even though return 0.
     663                 :            :                  */
     664         [ +  - ]:      11142 :                 if (flags & FAULT_FLAG_RETRY_NOWAIT)
     665                 :            :                         return 0;
     666                 :            : 
     667                 :      11142 :                 up_read(&mm->mmap_sem);
     668         [ +  + ]:      11142 :                 if (flags & FAULT_FLAG_KILLABLE)
     669                 :            :                         wait_on_page_locked_killable(page);
     670                 :            :                 else
     671                 :            :                         wait_on_page_locked(page);
     672                 :            :                 return 0;
     673                 :            :         } else {
     674         [ +  - ]:         14 :                 if (flags & FAULT_FLAG_KILLABLE) {
     675                 :            :                         int ret;
     676                 :            : 
     677                 :         14 :                         ret = __lock_page_killable(page);
     678         [ -  + ]:         14 :                         if (ret) {
     679                 :          0 :                                 up_read(&mm->mmap_sem);
     680                 :          0 :                                 return 0;
     681                 :            :                         }
     682                 :            :                 } else
     683                 :          0 :                         __lock_page(page);
     684                 :            :                 return 1;
     685                 :            :         }
     686                 :            : }
     687                 :            : 
     688                 :            : /**
     689                 :            :  * find_get_page - find and get a page reference
     690                 :            :  * @mapping: the address_space to search
     691                 :            :  * @offset: the page index
     692                 :            :  *
     693                 :            :  * Is there a pagecache struct page at the given (mapping, offset) tuple?
     694                 :            :  * If yes, increment its refcount and return it; if no, return NULL.
     695                 :            :  */
     696                 :          0 : struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
     697                 :            : {
     698                 :            :         void **pagep;
     699                 :            :         struct page *page;
     700                 :            : 
     701                 :            :         rcu_read_lock();
     702                 :            : repeat:
     703                 :            :         page = NULL;
     704                 :   72060995 :         pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
     705         [ +  + ]:   72058697 :         if (pagep) {
     706                 :            :                 page = radix_tree_deref_slot(pagep);
     707         [ +  + ]:   67387212 :                 if (unlikely(!page))
     708                 :            :                         goto out;
     709         [ -  + ]:   67384042 :                 if (radix_tree_exception(page)) {
     710         [ #  # ]:          0 :                         if (radix_tree_deref_retry(page))
     711                 :            :                                 goto repeat;
     712                 :            :                         /*
     713                 :            :                          * Otherwise, shmem/tmpfs must be storing a swap entry
     714                 :            :                          * here as an exceptional entry: so return it without
     715                 :            :                          * attempting to raise page count.
     716                 :            :                          */
     717                 :            :                         goto out;
     718                 :            :                 }
     719            [ + ]:   67388891 :                 if (!page_cache_get_speculative(page))
     720                 :            :                         goto repeat;
     721                 :            : 
     722                 :            :                 /*
     723                 :            :                  * Has the page moved?
     724                 :            :                  * This is part of the lockless pagecache protocol. See
     725                 :            :                  * include/linux/pagemap.h for details.
     726                 :            :                  */
     727         [ -  + ]:   67389105 :                 if (unlikely(page != *pagep)) {
     728                 :          0 :                         page_cache_release(page);
     729                 :          0 :                         goto repeat;
     730                 :            :                 }
     731                 :            :         }
     732                 :            : out:
     733                 :            :         rcu_read_unlock();
     734                 :            : 
     735                 :   72061782 :         return page;
     736                 :            : }
     737                 :            : EXPORT_SYMBOL(find_get_page);
     738                 :            : 
     739                 :            : /**
     740                 :            :  * find_lock_page - locate, pin and lock a pagecache page
     741                 :            :  * @mapping: the address_space to search
     742                 :            :  * @offset: the page index
     743                 :            :  *
     744                 :            :  * Locates the desired pagecache page, locks it, increments its reference
     745                 :            :  * count and returns its address.
     746                 :            :  *
     747                 :            :  * Returns zero if the page was not present. find_lock_page() may sleep.
     748                 :            :  */
     749                 :    7266301 : struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
     750                 :            : {
     751                 :            :         struct page *page;
     752                 :            : 
     753                 :            : repeat:
     754                 :    7266301 :         page = find_get_page(mapping, offset);
     755 [ +  + ][ +  + ]:    7266586 :         if (page && !radix_tree_exception(page)) {
     756                 :            :                 lock_page(page);
     757                 :            :                 /* Has the page been truncated? */
     758         [ -  + ]:    5509302 :                 if (unlikely(page->mapping != mapping)) {
     759                 :          0 :                         unlock_page(page);
     760                 :          0 :                         page_cache_release(page);
     761                 :          0 :                         goto repeat;
     762                 :            :                 }
     763                 :            :                 VM_BUG_ON(page->index != offset);
     764                 :            :         }
     765                 :        379 :         return page;
     766                 :            : }
     767                 :            : EXPORT_SYMBOL(find_lock_page);
     768                 :            : 
     769                 :            : /**
     770                 :            :  * find_or_create_page - locate or add a pagecache page
     771                 :            :  * @mapping: the page's address_space
     772                 :            :  * @index: the page's index into the mapping
     773                 :            :  * @gfp_mask: page allocation mode
     774                 :            :  *
     775                 :            :  * Locates a page in the pagecache.  If the page is not present, a new page
     776                 :            :  * is allocated using @gfp_mask and is added to the pagecache and to the VM's
     777                 :            :  * LRU list.  The returned page is locked and has its reference count
     778                 :            :  * incremented.
     779                 :            :  *
     780                 :            :  * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic
     781                 :            :  * allocation!
     782                 :            :  *
     783                 :            :  * find_or_create_page() returns the desired page's address, or zero on
     784                 :            :  * memory exhaustion.
     785                 :            :  */
     786                 :      80295 : struct page *find_or_create_page(struct address_space *mapping,
     787                 :            :                 pgoff_t index, gfp_t gfp_mask)
     788                 :            : {
     789                 :            :         struct page *page;
     790                 :            :         int err;
     791                 :            : repeat:
     792                 :      80295 :         page = find_lock_page(mapping, index);
     793         [ +  + ]:      80295 :         if (!page) {
     794                 :            :                 page = __page_cache_alloc(gfp_mask);
     795            [ + ]:      64737 :                 if (!page)
     796                 :            :                         return NULL;
     797                 :            :                 /*
     798                 :            :                  * We want a regular kernel memory (not highmem or DMA etc)
     799                 :            :                  * allocation for the radix tree nodes, but we need to honour
     800                 :            :                  * the context-specific requirements the caller has asked for.
     801                 :            :                  * GFP_RECLAIM_MASK collects those requirements.
     802                 :            :                  */
     803                 :      64738 :                 err = add_to_page_cache_lru(page, mapping, index,
     804                 :            :                         (gfp_mask & GFP_RECLAIM_MASK));
     805         [ -  + ]:      64738 :                 if (unlikely(err)) {
     806                 :          0 :                         page_cache_release(page);
     807                 :            :                         page = NULL;
     808         [ #  # ]:          0 :                         if (err == -EEXIST)
     809                 :            :                                 goto repeat;
     810                 :            :                 }
     811                 :            :         }
     812                 :      80295 :         return page;
     813                 :            : }
     814                 :            : EXPORT_SYMBOL(find_or_create_page);
     815                 :            : 
     816                 :            : /**
     817                 :            :  * find_get_pages - gang pagecache lookup
     818                 :            :  * @mapping:    The address_space to search
     819                 :            :  * @start:      The starting page index
     820                 :            :  * @nr_pages:   The maximum number of pages
     821                 :            :  * @pages:      Where the resulting pages are placed
     822                 :            :  *
     823                 :            :  * find_get_pages() will search for and return a group of up to
     824                 :            :  * @nr_pages pages in the mapping.  The pages are placed at @pages.
     825                 :            :  * find_get_pages() takes a reference against the returned pages.
     826                 :            :  *
     827                 :            :  * The search returns a group of mapping-contiguous pages with ascending
     828                 :            :  * indexes.  There may be holes in the indices due to not-present pages.
     829                 :            :  *
     830                 :            :  * find_get_pages() returns the number of pages which were found.
     831                 :            :  */
     832                 :          0 : unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
     833                 :            :                             unsigned int nr_pages, struct page **pages)
     834                 :            : {
     835                 :            :         struct radix_tree_iter iter;
     836                 :            :         void **slot;
     837                 :            :         unsigned ret = 0;
     838                 :            : 
     839            [ + ]:     555827 :         if (unlikely(!nr_pages))
     840                 :            :                 return 0;
     841                 :            : 
     842                 :            :         rcu_read_lock();
     843                 :            : restart:
     844 [ +  + ][ +  + ]:    3835841 :         radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
     845                 :            :                 struct page *page;
     846                 :            : repeat:
     847                 :            :                 page = radix_tree_deref_slot(slot);
     848            [ + ]:    3498539 :                 if (unlikely(!page))
     849                 :          0 :                         continue;
     850                 :            : 
     851         [ -  + ]:    3499168 :                 if (radix_tree_exception(page)) {
     852         [ #  # ]:          0 :                         if (radix_tree_deref_retry(page)) {
     853                 :            :                                 /*
     854                 :            :                                  * Transient condition which can only trigger
     855                 :            :                                  * when entry at index 0 moves out of or back
     856                 :            :                                  * to root: none yet gotten, safe to restart.
     857                 :            :                                  */
     858         [ #  # ]:          0 :                                 WARN_ON(iter.index);
     859                 :            :                                 goto restart;
     860                 :            :                         }
     861                 :            :                         /*
     862                 :            :                          * Otherwise, shmem/tmpfs must be storing a swap entry
     863                 :            :                          * here as an exceptional entry: so skip over it -
     864                 :            :                          * we only reach this from invalidate_mapping_pages().
     865                 :            :                          */
     866                 :          0 :                         continue;
     867                 :            :                 }
     868                 :            : 
     869            [ + ]:    3499325 :                 if (!page_cache_get_speculative(page))
     870                 :            :                         goto repeat;
     871                 :            : 
     872                 :            :                 /* Has the page moved? */
     873         [ -  + ]:    3500631 :                 if (unlikely(page != *slot)) {
     874                 :          0 :                         page_cache_release(page);
     875                 :    3499845 :                         goto repeat;
     876                 :            :                 }
     877                 :            : 
     878                 :    3500631 :                 pages[ret] = page;
     879            [ + ]:    3500631 :                 if (++ret == nr_pages)
     880                 :            :                         break;
     881                 :            :         }
     882                 :            : 
     883                 :            :         rcu_read_unlock();
     884                 :     555853 :         return ret;
     885                 :            : }
     886                 :            : 
     887                 :            : /**
     888                 :            :  * find_get_pages_contig - gang contiguous pagecache lookup
     889                 :            :  * @mapping:    The address_space to search
     890                 :            :  * @index:      The starting page index
     891                 :            :  * @nr_pages:   The maximum number of pages
     892                 :            :  * @pages:      Where the resulting pages are placed
     893                 :            :  *
     894                 :            :  * find_get_pages_contig() works exactly like find_get_pages(), except
     895                 :            :  * that the returned number of pages are guaranteed to be contiguous.
     896                 :            :  *
     897                 :            :  * find_get_pages_contig() returns the number of pages which were found.
     898                 :            :  */
     899                 :          0 : unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
     900                 :            :                                unsigned int nr_pages, struct page **pages)
     901                 :            : {
     902                 :            :         struct radix_tree_iter iter;
     903                 :            :         void **slot;
     904                 :            :         unsigned int ret = 0;
     905                 :            : 
     906         [ +  - ]:         22 :         if (unlikely(!nr_pages))
     907                 :            :                 return 0;
     908                 :            : 
     909                 :            :         rcu_read_lock();
     910                 :            : restart:
     911 [ +  - ][ +  - ]:         22 :         radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
     912                 :            :                 struct page *page;
     913                 :            : repeat:
     914                 :            :                 page = radix_tree_deref_slot(slot);
     915                 :            :                 /* The hole, there no reason to continue */
     916         [ #  # ]:         22 :                 if (unlikely(!page))
     917                 :            :                         break;
     918                 :            : 
     919         [ -  + ]:         22 :                 if (radix_tree_exception(page)) {
     920         [ #  # ]:          0 :                         if (radix_tree_deref_retry(page)) {
     921                 :            :                                 /*
     922                 :            :                                  * Transient condition which can only trigger
     923                 :            :                                  * when entry at index 0 moves out of or back
     924                 :            :                                  * to root: none yet gotten, safe to restart.
     925                 :            :                                  */
     926                 :            :                                 goto restart;
     927                 :            :                         }
     928                 :            :                         /*
     929                 :            :                          * Otherwise, shmem/tmpfs must be storing a swap entry
     930                 :            :                          * here as an exceptional entry: so stop looking for
     931                 :            :                          * contiguous pages.
     932                 :            :                          */
     933                 :            :                         break;
     934                 :            :                 }
     935                 :            : 
     936         [ -  + ]:         22 :                 if (!page_cache_get_speculative(page))
     937                 :            :                         goto repeat;
     938                 :            : 
     939                 :            :                 /* Has the page moved? */
     940         [ -  + ]:         22 :                 if (unlikely(page != *slot)) {
     941                 :          0 :                         page_cache_release(page);
     942                 :         22 :                         goto repeat;
     943                 :            :                 }
     944                 :            : 
     945                 :            :                 /*
     946                 :            :                  * must check mapping and index after taking the ref.
     947                 :            :                  * otherwise we can get both false positives and false
     948                 :            :                  * negatives, which is just confusing to the caller.
     949                 :            :                  */
     950 [ +  - ][ -  + ]:         22 :                 if (page->mapping == NULL || page->index != iter.index) {
     951                 :          0 :                         page_cache_release(page);
     952                 :          0 :                         break;
     953                 :            :                 }
     954                 :            : 
     955                 :         22 :                 pages[ret] = page;
     956         [ -  + ]:         22 :                 if (++ret == nr_pages)
     957                 :            :                         break;
     958                 :            :         }
     959                 :            :         rcu_read_unlock();
     960                 :         22 :         return ret;
     961                 :            : }
     962                 :            : EXPORT_SYMBOL(find_get_pages_contig);
     963                 :            : 
     964                 :            : /**
     965                 :            :  * find_get_pages_tag - find and return pages that match @tag
     966                 :            :  * @mapping:    the address_space to search
     967                 :            :  * @index:      the starting page index
     968                 :            :  * @tag:        the tag index
     969                 :            :  * @nr_pages:   the maximum number of pages
     970                 :            :  * @pages:      where the resulting pages are placed
     971                 :            :  *
     972                 :            :  * Like find_get_pages, except we only return pages which are tagged with
     973                 :            :  * @tag.   We update @index to index the next page for the traversal.
     974                 :            :  */
     975                 :          0 : unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
     976                 :            :                         int tag, unsigned int nr_pages, struct page **pages)
     977                 :            : {
     978                 :            :         struct radix_tree_iter iter;
     979                 :            :         void **slot;
     980                 :            :         unsigned ret = 0;
     981                 :            : 
     982         [ +  + ]:    2231148 :         if (unlikely(!nr_pages))
     983                 :            :                 return 0;
     984                 :            : 
     985                 :            :         rcu_read_lock();
     986                 :            : restart:
     987 [ +  + ][ +  + ]:    4144077 :         radix_tree_for_each_tagged(slot, &mapping->page_tree,
     988                 :            :                                    &iter, *index, tag) {
     989                 :            :                 struct page *page;
     990                 :            : repeat:
     991                 :            :                 page = radix_tree_deref_slot(slot);
     992            [ + ]:    2034479 :                 if (unlikely(!page))
     993                 :          0 :                         continue;
     994                 :            : 
     995         [ -  + ]:    2034810 :                 if (radix_tree_exception(page)) {
     996         [ #  # ]:          0 :                         if (radix_tree_deref_retry(page)) {
     997                 :            :                                 /*
     998                 :            :                                  * Transient condition which can only trigger
     999                 :            :                                  * when entry at index 0 moves out of or back
    1000                 :            :                                  * to root: none yet gotten, safe to restart.
    1001                 :            :                                  */
    1002                 :            :                                 goto restart;
    1003                 :            :                         }
    1004                 :            :                         /*
    1005                 :            :                          * This function is never used on a shmem/tmpfs
    1006                 :            :                          * mapping, so a swap entry won't be found here.
    1007                 :            :                          */
    1008                 :          0 :                         BUG();
    1009                 :            :                 }
    1010                 :            : 
    1011            [ + ]:    2034838 :                 if (!page_cache_get_speculative(page))
    1012                 :            :                         goto repeat;
    1013                 :            : 
    1014                 :            :                 /* Has the page moved? */
    1015         [ -  + ]:    2035675 :                 if (unlikely(page != *slot)) {
    1016                 :          0 :                         page_cache_release(page);
    1017                 :    2035316 :                         goto repeat;
    1018                 :            :                 }
    1019                 :            : 
    1020                 :    2035675 :                 pages[ret] = page;
    1021            [ + ]:    2035675 :                 if (++ret == nr_pages)
    1022                 :            :                         break;
    1023                 :            :         }
    1024                 :            : 
    1025                 :            :         rcu_read_unlock();
    1026                 :            : 
    1027         [ +  + ]:    2230650 :         if (ret)
    1028                 :     247649 :                 *index = pages[ret - 1]->index + 1;
    1029                 :            : 
    1030                 :    2230650 :         return ret;
    1031                 :            : }
    1032                 :            : EXPORT_SYMBOL(find_get_pages_tag);
    1033                 :            : 
    1034                 :            : /**
    1035                 :            :  * grab_cache_page_nowait - returns locked page at given index in given cache
    1036                 :            :  * @mapping: target address_space
    1037                 :            :  * @index: the page index
    1038                 :            :  *
    1039                 :            :  * Same as grab_cache_page(), but do not wait if the page is unavailable.
    1040                 :            :  * This is intended for speculative data generators, where the data can
    1041                 :            :  * be regenerated if the page couldn't be grabbed.  This routine should
    1042                 :            :  * be safe to call while holding the lock for another page.
    1043                 :            :  *
    1044                 :            :  * Clear __GFP_FS when allocating the page to avoid recursion into the fs
    1045                 :            :  * and deadlock against the caller's locked page.
    1046                 :            :  */
    1047                 :            : struct page *
    1048                 :          0 : grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
    1049                 :            : {
    1050                 :          0 :         struct page *page = find_get_page(mapping, index);
    1051                 :            : 
    1052         [ #  # ]:          0 :         if (page) {
    1053         [ #  # ]:          0 :                 if (trylock_page(page))
    1054                 :            :                         return page;
    1055                 :          0 :                 page_cache_release(page);
    1056                 :          0 :                 return NULL;
    1057                 :            :         }
    1058                 :          0 :         page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
    1059 [ #  # ][ #  # ]:          0 :         if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) {
    1060                 :          0 :                 page_cache_release(page);
    1061                 :            :                 page = NULL;
    1062                 :            :         }
    1063                 :          0 :         return page;
    1064                 :            : }
    1065                 :            : EXPORT_SYMBOL(grab_cache_page_nowait);
    1066                 :            : 
    1067                 :            : /*
    1068                 :            :  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
    1069                 :            :  * a _large_ part of the i/o request. Imagine the worst scenario:
    1070                 :            :  *
    1071                 :            :  *      ---R__________________________________________B__________
    1072                 :            :  *         ^ reading here                             ^ bad block(assume 4k)
    1073                 :            :  *
    1074                 :            :  * read(R) => miss => readahead(R...B) => media error => frustrating retries
    1075                 :            :  * => failing the whole request => read(R) => read(R+1) =>
    1076                 :            :  * readahead(R+1...B+1) => bang => read(R+2) => read(R+3) =>
    1077                 :            :  * readahead(R+3...B+2) => bang => read(R+3) => read(R+4) =>
    1078                 :            :  * readahead(R+4...B+3) => bang => read(R+4) => read(R+5) => ......
    1079                 :            :  *
    1080                 :            :  * It is going insane. Fix it by quickly scaling down the readahead size.
    1081                 :            :  */
    1082                 :            : static void shrink_readahead_size_eio(struct file *filp,
    1083                 :            :                                         struct file_ra_state *ra)
    1084                 :            : {
    1085                 :          0 :         ra->ra_pages /= 4;
    1086                 :            : }
    1087                 :            : 
    1088                 :            : /**
    1089                 :            :  * do_generic_file_read - generic file read routine
    1090                 :            :  * @filp:       the file to read
    1091                 :            :  * @ppos:       current file position
    1092                 :            :  * @desc:       read_descriptor
    1093                 :            :  *
    1094                 :            :  * This is a generic file read routine, and uses the
    1095                 :            :  * mapping->a_ops->readpage() function for the actual low-level stuff.
    1096                 :            :  *
    1097                 :            :  * This is really ugly. But the goto's actually try to clarify some
    1098                 :            :  * of the logic when it comes to error handling etc.
    1099                 :            :  */
    1100                 :          0 : static void do_generic_file_read(struct file *filp, loff_t *ppos,
    1101                 :            :                 read_descriptor_t *desc)
    1102                 :            : {
    1103                 :   35630806 :         struct address_space *mapping = filp->f_mapping;
    1104                 :   16044244 :         struct inode *inode = mapping->host;
    1105                 :   16044244 :         struct file_ra_state *ra = &filp->f_ra;
    1106                 :            :         pgoff_t index;
    1107                 :            :         pgoff_t last_index;
    1108                 :            :         pgoff_t prev_index;
    1109                 :            :         unsigned long offset;      /* offset into pagecache page */
    1110                 :            :         unsigned int prev_offset;
    1111                 :            :         int error;
    1112                 :            : 
    1113                 :   16044244 :         index = *ppos >> PAGE_CACHE_SHIFT;
    1114                 :   16044244 :         prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
    1115                 :   16044244 :         prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
    1116                 :   16044244 :         last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
    1117                 :   20015325 :         offset = *ppos & ~PAGE_CACHE_MASK;
    1118                 :            : 
    1119                 :            :         for (;;) {
    1120                 :            :                 struct page *page;
    1121                 :            :                 pgoff_t end_index;
    1122                 :            :                 loff_t isize;
    1123                 :            :                 unsigned long nr, ret;
    1124                 :            : 
    1125                 :   20015325 :                 cond_resched();
    1126                 :            : find_page:
    1127                 :   20018822 :                 page = find_get_page(mapping, index);
    1128         [ +  + ]:   20019022 :                 if (!page) {
    1129                 :     133017 :                         page_cache_sync_readahead(mapping,
    1130                 :            :                                         ra, filp,
    1131                 :            :                                         index, last_index - index);
    1132                 :     133024 :                         page = find_get_page(mapping, index);
    1133         [ +  + ]:     133035 :                         if (unlikely(page == NULL))
    1134                 :            :                                 goto no_cached_page;
    1135                 :            :                 }
    1136         [ +  + ]:   19990802 :                 if (PageReadahead(page)) {
    1137                 :      16377 :                         page_cache_async_readahead(mapping,
    1138                 :            :                                         ra, filp, page,
    1139                 :            :                                         index, last_index - index);
    1140                 :            :                 }
    1141         [ +  + ]:   19991361 :                 if (!PageUptodate(page)) {
    1142 [ -  + ][ #  # ]:      29617 :                         if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
    1143                 :          0 :                                         !mapping->a_ops->is_partially_uptodate)
    1144                 :            :                                 goto page_not_up_to_date;
    1145         [ #  # ]:          0 :                         if (!trylock_page(page))
    1146                 :            :                                 goto page_not_up_to_date;
    1147                 :            :                         /* Did it get truncated before we got the lock? */
    1148         [ #  # ]:          0 :                         if (!page->mapping)
    1149                 :            :                                 goto page_not_up_to_date_locked;
    1150         [ #  # ]:          0 :                         if (!mapping->a_ops->is_partially_uptodate(page,
    1151                 :            :                                                                 desc, offset))
    1152                 :            :                                 goto page_not_up_to_date_locked;
    1153                 :   20019646 :                         unlock_page(page);
    1154                 :            :                 }
    1155                 :            : page_ok:
    1156                 :            :                 /*
    1157                 :            :                  * i_size must be checked after we know the page is Uptodate.
    1158                 :            :                  *
    1159                 :            :                  * Checking i_size after the check allows us to calculate
    1160                 :            :                  * the correct value for "nr", which means the zero-filled
    1161                 :            :                  * part of the page is not copied back to userspace (unless
    1162                 :            :                  * another truncate extends the file - this is desired though).
    1163                 :            :                  */
    1164                 :            : 
    1165                 :            :                 isize = i_size_read(inode);
    1166                 :   20018998 :                 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
    1167         [ +  + ]:   20018998 :                 if (unlikely(!isize || index > end_index)) {
    1168                 :      28046 :                         page_cache_release(page);
    1169                 :      28046 :                         goto out;
    1170                 :            :                 }
    1171                 :            : 
    1172                 :            :                 /* nr is the maximum number of bytes to copy from this page */
    1173                 :            :                 nr = PAGE_CACHE_SIZE;
    1174         [ +  + ]:   19990952 :                 if (index == end_index) {
    1175                 :    1778632 :                         nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
    1176         [ +  + ]:    1778632 :                         if (nr <= offset) {
    1177                 :     404390 :                                 page_cache_release(page);
    1178                 :     404394 :                                 goto out;
    1179                 :            :                         }
    1180                 :            :                 }
    1181                 :   19586562 :                 nr = nr - offset;
    1182                 :            : 
    1183                 :            :                 /* If users can be writing to this page using arbitrary
    1184                 :            :                  * virtual addresses, take care about potential aliasing
    1185                 :            :                  * before reading the page on the kernel side.
    1186                 :            :                  */
    1187         [ +  + ]:   19586562 :                 if (mapping_writably_mapped(mapping))
    1188                 :    1865722 :                         flush_dcache_page(page);
    1189                 :            : 
    1190                 :            :                 /*
    1191                 :            :                  * When a sequential read accesses a page several times,
    1192                 :            :                  * only mark it as accessed the first time.
    1193                 :            :                  */
    1194         [ +  + ]:   19587183 :                 if (prev_index != index || offset != prev_offset)
    1195                 :    8580759 :                         mark_page_accessed(page);
    1196                 :            :                 prev_index = index;
    1197                 :            : 
    1198                 :            :                 /*
    1199                 :            :                  * Ok, we have the page, and it's up-to-date, so
    1200                 :            :                  * now we can copy it to user space...
    1201                 :            :                  *
    1202                 :            :                  * The file_read_actor routine returns how many bytes were
    1203                 :            :                  * actually used..
    1204                 :            :                  * NOTE! This may not be the same as how much of a user buffer
    1205                 :            :                  * we filled up (we may be padding etc), so we can only update
    1206                 :            :                  * "pos" here (the actor routine has to update the user buffer
    1207                 :            :                  * pointers and the remaining count).
    1208                 :            :                  */
    1209                 :   19586794 :                 ret = file_read_actor(desc, page, offset, nr);
    1210                 :   19586833 :                 offset += ret;
    1211                 :   19586833 :                 index += offset >> PAGE_CACHE_SHIFT;
    1212                 :   19586833 :                 offset &= ~PAGE_CACHE_MASK;
    1213                 :            :                 prev_offset = offset;
    1214                 :            : 
    1215                 :   19586833 :                 page_cache_release(page);
    1216 [ +  + ][ +  + ]:   19587298 :                 if (ret == nr && desc->count)
    1217                 :    3971081 :                         continue;
    1218                 :            :                 goto out;
    1219                 :            : 
    1220                 :            : page_not_up_to_date:
    1221                 :            :                 /* Get exclusive access to the page ... */
    1222                 :            :                 error = lock_page_killable(page);
    1223         [ +  - ]:      29617 :                 if (unlikely(error))
    1224                 :            :                         goto readpage_error;
    1225                 :            : 
    1226                 :            : page_not_up_to_date_locked:
    1227                 :            :                 /* Did it get truncated before we got the lock? */
    1228         [ -  + ]:      29617 :                 if (!page->mapping) {
    1229                 :          0 :                         unlock_page(page);
    1230                 :          0 :                         page_cache_release(page);
    1231                 :          0 :                         continue;
    1232                 :            :                 }
    1233                 :            : 
    1234                 :            :                 /* Did somebody else fill it already? */
    1235         [ +  + ]:      29617 :                 if (PageUptodate(page)) {
    1236                 :      29247 :                         unlock_page(page);
    1237                 :      29247 :                         goto page_ok;
    1238                 :            :                 }
    1239                 :            : 
    1240                 :            : readpage:
    1241                 :            :                 /*
    1242                 :            :                  * A previous I/O error may have been due to temporary
    1243                 :            :                  * failures, eg. multipath errors.
    1244                 :            :                  * PG_error will be set again if readpage fails.
    1245                 :            :                  */
    1246                 :            :                 ClearPageError(page);
    1247                 :            :                 /* Start the actual read. The read will unlock the page. */
    1248                 :      28608 :                 error = mapping->a_ops->readpage(filp, page);
    1249                 :            : 
    1250         [ -  + ]:      28605 :                 if (unlikely(error)) {
    1251         [ #  # ]:          0 :                         if (error == AOP_TRUNCATED_PAGE) {
    1252                 :          0 :                                 page_cache_release(page);
    1253                 :          0 :                                 goto find_page;
    1254                 :            :                         }
    1255                 :            :                         goto readpage_error;
    1256                 :            :                 }
    1257                 :            : 
    1258         [ +  - ]:      28605 :                 if (!PageUptodate(page)) {
    1259                 :            :                         error = lock_page_killable(page);
    1260         [ #  # ]:          0 :                         if (unlikely(error))
    1261                 :            :                                 goto readpage_error;
    1262         [ #  # ]:          0 :                         if (!PageUptodate(page)) {
    1263         [ #  # ]:          0 :                                 if (page->mapping == NULL) {
    1264                 :            :                                         /*
    1265                 :            :                                          * invalidate_mapping_pages got it
    1266                 :            :                                          */
    1267                 :          0 :                                         unlock_page(page);
    1268                 :          0 :                                         page_cache_release(page);
    1269                 :          0 :                                         goto find_page;
    1270                 :            :                                 }
    1271                 :          0 :                                 unlock_page(page);
    1272                 :            :                                 shrink_readahead_size_eio(filp, ra);
    1273                 :            :                                 error = -EIO;
    1274                 :          0 :                                 goto readpage_error;
    1275                 :            :                         }
    1276                 :          0 :                         unlock_page(page);
    1277                 :            :                 }
    1278                 :            : 
    1279                 :            :                 goto page_ok;
    1280                 :            : 
    1281                 :            : readpage_error:
    1282                 :            :                 /* UHHUH! A synchronous read error occurred. Report it */
    1283                 :          0 :                 desc->error = error;
    1284                 :          0 :                 page_cache_release(page);
    1285                 :          0 :                 goto out;
    1286                 :            : 
    1287                 :            : no_cached_page:
    1288                 :            :                 /*
    1289                 :            :                  * Ok, it wasn't cached, so we need to create a new
    1290                 :            :                  * page..
    1291                 :            :                  */
    1292                 :            :                 page = page_cache_alloc_cold(mapping);
    1293         [ -  + ]:      28238 :                 if (!page) {
    1294                 :          0 :                         desc->error = -ENOMEM;
    1295                 :          0 :                         goto out;
    1296                 :            :                 }
    1297                 :      28238 :                 error = add_to_page_cache_lru(page, mapping,
    1298                 :            :                                                 index, GFP_KERNEL);
    1299         [ +  - ]:      28238 :                 if (error) {
    1300                 :          0 :                         page_cache_release(page);
    1301         [ #  # ]:          0 :                         if (error == -EEXIST)
    1302                 :            :                                 goto find_page;
    1303                 :          0 :                         desc->error = error;
    1304                 :          0 :                         goto out;
    1305                 :            :                 }
    1306                 :            :                 goto readpage;
    1307                 :            :         }
    1308                 :            : 
    1309                 :            : out:
    1310                 :   16048657 :         ra->prev_pos = prev_index;
    1311                 :   16048657 :         ra->prev_pos <<= PAGE_CACHE_SHIFT;
    1312                 :   16048657 :         ra->prev_pos |= prev_offset;
    1313                 :            : 
    1314                 :   16048657 :         *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
    1315                 :            :         file_accessed(filp);
    1316                 :   16048669 : }
    1317                 :            : 
    1318                 :          0 : int file_read_actor(read_descriptor_t *desc, struct page *page,
    1319                 :            :                         unsigned long offset, unsigned long size)
    1320                 :            : {
    1321                 :            :         char *kaddr;
    1322                 :   19593630 :         unsigned long left, count = desc->count;
    1323                 :            : 
    1324         [ +  + ]:   19593630 :         if (size > count)
    1325                 :            :                 size = count;
    1326                 :            : 
    1327                 :            :         /*
    1328                 :            :          * Faults on the destination of a read are common, so do it before
    1329                 :            :          * taking the kmap.
    1330                 :            :          */
    1331         [ +  + ]:   19593388 :         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
    1332                 :   19593385 :                 kaddr = kmap_atomic(page);
    1333                 :   19594172 :                 left = __copy_to_user_inatomic(desc->arg.buf,
    1334                 :            :                                                 kaddr + offset, size);
    1335                 :   19593724 :                 kunmap_atomic(kaddr);
    1336         [ -  + ]:   19593790 :                 if (left == 0)
    1337                 :            :                         goto success;
    1338                 :            :         }
    1339                 :            : 
    1340                 :            :         /* Do it the slow way */
    1341                 :          3 :         kaddr = kmap(page);
    1342                 :          3 :         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
    1343                 :          3 :         kunmap(page);
    1344                 :            : 
    1345         [ +  - ]:          3 :         if (left) {
    1346                 :          3 :                 size -= left;
    1347                 :          3 :                 desc->error = -EFAULT;
    1348                 :            :         }
    1349                 :            : success:
    1350                 :   19593793 :         desc->count = count - size;
    1351                 :   19593793 :         desc->written += size;
    1352                 :   19593793 :         desc->arg.buf += size;
    1353                 :   19593793 :         return size;
    1354                 :            : }
    1355                 :            : 
    1356                 :            : /*
    1357                 :            :  * Performs necessary checks before doing a write
    1358                 :            :  * @iov:        io vector request
    1359                 :            :  * @nr_segs:    number of segments in the iovec
    1360                 :            :  * @count:      number of bytes to write
    1361                 :            :  * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE
    1362                 :            :  *
    1363                 :            :  * Adjust number of segments and amount of bytes to write (nr_segs should be
    1364                 :            :  * properly initialized first). Returns appropriate error code that caller
    1365                 :            :  * should return or zero in case that write should be allowed.
    1366                 :            :  */
    1367                 :          0 : int generic_segment_checks(const struct iovec *iov,
    1368                 :            :                         unsigned long *nr_segs, size_t *count, int access_flags)
    1369                 :            : {
    1370                 :            :         unsigned long   seg;
    1371                 :            :         size_t cnt = 0;
    1372         [ +  + ]:   39720946 :         for (seg = 0; seg < *nr_segs; seg++) {
    1373                 :   21213024 :                 const struct iovec *iv = &iov[seg];
    1374                 :            : 
    1375                 :            :                 /*
    1376                 :            :                  * If any segment has a negative length, or the cumulative
    1377                 :            :                  * length ever wraps negative then return -EINVAL.
    1378                 :            :                  */
    1379                 :   21213024 :                 cnt += iv->iov_len;
    1380            [ + ]:   21213024 :                 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
    1381                 :            :                         return -EINVAL;
    1382         [ +  - ]:   39720946 :                 if (access_ok(access_flags, iv->iov_base, iv->iov_len))
    1383                 :   21211114 :                         continue;
    1384         [ #  # ]:          0 :                 if (seg == 0)
    1385                 :            :                         return -EFAULT;
    1386                 :          0 :                 *nr_segs = seg;
    1387                 :          0 :                 cnt -= iv->iov_len;  /* This segment is no good */
    1388                 :          0 :                 break;
    1389                 :            :         }
    1390                 :   18507922 :         *count = cnt;
    1391                 :   18507922 :         return 0;
    1392                 :            : }
    1393                 :            : EXPORT_SYMBOL(generic_segment_checks);
    1394                 :            : 
    1395                 :            : /**
    1396                 :            :  * generic_file_aio_read - generic filesystem read routine
    1397                 :            :  * @iocb:       kernel I/O control block
    1398                 :            :  * @iov:        io vector request
    1399                 :            :  * @nr_segs:    number of segments in the iovec
    1400                 :            :  * @pos:        current file position
    1401                 :            :  *
    1402                 :            :  * This is the "read()" routine for all filesystems
    1403                 :            :  * that can use the page cache directly.
    1404                 :            :  */
    1405                 :            : ssize_t
    1406                 :          0 : generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
    1407                 :            :                 unsigned long nr_segs, loff_t pos)
    1408                 :            : {
    1409                 :   14934286 :         struct file *filp = iocb->ki_filp;
    1410                 :            :         ssize_t retval;
    1411                 :            :         unsigned long seg = 0;
    1412                 :            :         size_t count;
    1413                 :   14934286 :         loff_t *ppos = &iocb->ki_pos;
    1414                 :            : 
    1415                 :   14934286 :         count = 0;
    1416                 :   14934286 :         retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
    1417         [ +  + ]:   14934291 :         if (retval)
    1418                 :            :                 return retval;
    1419                 :            : 
    1420                 :            :         /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
    1421         [ +  + ]:   14934067 :         if (filp->f_flags & O_DIRECT) {
    1422                 :            :                 loff_t size;
    1423                 :            :                 struct address_space *mapping;
    1424                 :            :                 struct inode *inode;
    1425                 :            : 
    1426                 :      31539 :                 mapping = filp->f_mapping;
    1427                 :      31539 :                 inode = mapping->host;
    1428         [ +  - ]:      31539 :                 if (!count)
    1429                 :            :                         goto out; /* skip atime */
    1430                 :            :                 size = i_size_read(inode);
    1431         [ +  + ]:      31539 :                 if (pos < size) {
    1432                 :      31533 :                         retval = filemap_write_and_wait_range(mapping, pos,
    1433                 :      31533 :                                         pos + iov_length(iov, nr_segs) - 1);
    1434         [ +  - ]:      31533 :                         if (!retval) {
    1435                 :      31533 :                                 retval = mapping->a_ops->direct_IO(READ, iocb,
    1436                 :            :                                                         iov, pos, nr_segs);
    1437                 :            :                         }
    1438            [ + ]:      31436 :                         if (retval > 0) {
    1439                 :      31525 :                                 *ppos = pos + retval;
    1440                 :      31525 :                                 count -= retval;
    1441                 :            :                         }
    1442                 :            : 
    1443                 :            :                         /*
    1444                 :            :                          * Btrfs can have a short DIO read if we encounter
    1445                 :            :                          * compressed extents, so if there was an error, or if
    1446                 :            :                          * we've already read everything we wanted to, or if
    1447                 :            :                          * there was a short read because we hit EOF, go ahead
    1448                 :            :                          * and return.  Otherwise fallthrough to buffered io for
    1449                 :            :                          * the rest of the read.
    1450                 :            :                          */
    1451    [ + ][ -  + ]:      31436 :                         if (retval < 0 || !count || *ppos >= size) {
                 [ #  # ]
    1452                 :            :                                 file_accessed(filp);
    1453                 :            :                                 goto out;
    1454                 :            :                         }
    1455                 :            :                 }
    1456                 :            :         }
    1457                 :            : 
    1458                 :   14903270 :         count = retval;
    1459         [ +  + ]:   30519345 :         for (seg = 0; seg < nr_segs; seg++) {
    1460                 :            :                 read_descriptor_t desc;
    1461                 :            :                 loff_t offset = 0;
    1462                 :            : 
    1463                 :            :                 /*
    1464                 :            :                  * If we did a short DIO read we need to skip the section of the
    1465                 :            :                  * iov that we've already read data into.
    1466                 :            :                  */
    1467         [ -  + ]:   16047706 :                 if (count) {
    1468         [ #  # ]:          0 :                         if (count > iov[seg].iov_len) {
    1469                 :          0 :                                 count -= iov[seg].iov_len;
    1470                 :         18 :                                 continue;
    1471                 :            :                         }
    1472                 :          0 :                         offset = count;
    1473                 :          0 :                         count = 0;
    1474                 :            :                 }
    1475                 :            : 
    1476                 :   16047706 :                 desc.written = 0;
    1477                 :   16047706 :                 desc.arg.buf = iov[seg].iov_base + offset;
    1478                 :   16047706 :                 desc.count = iov[seg].iov_len - offset;
    1479         [ +  + ]:   16047706 :                 if (desc.count == 0)
    1480                 :         18 :                         continue;
    1481                 :   16047688 :                 desc.error = 0;
    1482                 :   16047688 :                 do_generic_file_read(filp, ppos, &desc);
    1483                 :   16048492 :                 retval += desc.written;
    1484         [ +  + ]:   16048492 :                 if (desc.error) {
    1485         [ +  - ]:          3 :                         retval = retval ?: desc.error;
    1486                 :     432435 :                         break;
    1487                 :            :                 }
    1488         [ +  + ]:   16048489 :                 if (desc.count > 0)
    1489                 :            :                         break;
    1490                 :            :         }
    1491                 :            : out:
    1492                 :   14934774 :         return retval;
    1493                 :            : }
    1494                 :            : EXPORT_SYMBOL(generic_file_aio_read);
    1495                 :            : 
    1496                 :            : #ifdef CONFIG_MMU
    1497                 :            : /**
    1498                 :            :  * page_cache_read - adds requested page to the page cache if not already there
    1499                 :            :  * @file:       file to read
    1500                 :            :  * @offset:     page index
    1501                 :            :  *
    1502                 :            :  * This adds the requested page to the page cache if it isn't already there,
    1503                 :            :  * and schedules an I/O to read in its contents from disk.
    1504                 :            :  */
    1505                 :          0 : static int page_cache_read(struct file *file, pgoff_t offset)
    1506                 :            : {
    1507                 :         17 :         struct address_space *mapping = file->f_mapping;
    1508                 :            :         struct page *page; 
    1509                 :            :         int ret;
    1510                 :            : 
    1511                 :            :         do {
    1512                 :            :                 page = page_cache_alloc_cold(mapping);
    1513         [ +  - ]:         17 :                 if (!page)
    1514                 :            :                         return -ENOMEM;
    1515                 :            : 
    1516                 :         17 :                 ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
    1517         [ +  - ]:         17 :                 if (ret == 0)
    1518                 :         17 :                         ret = mapping->a_ops->readpage(file, page);
    1519         [ #  # ]:          0 :                 else if (ret == -EEXIST)
    1520                 :            :                         ret = 0; /* losing race to add is OK */
    1521                 :            : 
    1522                 :         17 :                 page_cache_release(page);
    1523                 :            : 
    1524         [ -  + ]:         17 :         } while (ret == AOP_TRUNCATED_PAGE);
    1525                 :            :                 
    1526                 :            :         return ret;
    1527                 :            : }
    1528                 :            : 
    1529                 :            : #define MMAP_LOTSAMISS  (100)
    1530                 :            : 
    1531                 :            : /*
    1532                 :            :  * Synchronous readahead happens when we don't even find
    1533                 :            :  * a page in the page cache at all.
    1534                 :            :  */
    1535                 :       2803 : static void do_sync_mmap_readahead(struct vm_area_struct *vma,
    1536                 :            :                                    struct file_ra_state *ra,
    1537                 :            :                                    struct file *file,
    1538                 :            :                                    pgoff_t offset)
    1539                 :            : {
    1540                 :            :         unsigned long ra_pages;
    1541                 :       2803 :         struct address_space *mapping = file->f_mapping;
    1542                 :            : 
    1543                 :            :         /* If we don't want any read-ahead, don't bother */
    1544         [ +  - ]:       2803 :         if (vma->vm_flags & VM_RAND_READ)
    1545                 :            :                 return;
    1546         [ +  - ]:       2803 :         if (!ra->ra_pages)
    1547                 :            :                 return;
    1548                 :            : 
    1549         [ -  + ]:       2803 :         if (vma->vm_flags & VM_SEQ_READ) {
    1550                 :          0 :                 page_cache_sync_readahead(mapping, ra, file, offset,
    1551                 :            :                                           ra->ra_pages);
    1552                 :            :                 return;
    1553                 :            :         }
    1554                 :            : 
    1555                 :            :         /* Avoid banging the cache line if not needed */
    1556         [ +  - ]:       2803 :         if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
    1557                 :       2803 :                 ra->mmap_miss++;
    1558                 :            : 
    1559                 :            :         /*
    1560                 :            :          * Do we miss much more than hit in this file? If so,
    1561                 :            :          * stop bothering with read-ahead. It will only hurt.
    1562                 :            :          */
    1563         [ +  - ]:       2803 :         if (ra->mmap_miss > MMAP_LOTSAMISS)
    1564                 :            :                 return;
    1565                 :            : 
    1566                 :            :         /*
    1567                 :            :          * mmap read-around
    1568                 :            :          */
    1569                 :       2803 :         ra_pages = max_sane_readahead(ra->ra_pages);
    1570                 :       2803 :         ra->start = max_t(long, 0, offset - ra_pages / 2);
    1571                 :       2803 :         ra->size = ra_pages;
    1572                 :       2803 :         ra->async_size = ra_pages / 4;
    1573                 :       2803 :         ra_submit(ra, mapping, file);
    1574                 :            : }
    1575                 :            : 
    1576                 :            : /*
    1577                 :            :  * Asynchronous readahead happens when we find the page and PG_readahead,
    1578                 :            :  * so we want to possibly extend the readahead further..
    1579                 :            :  */
    1580                 :   40920414 : static void do_async_mmap_readahead(struct vm_area_struct *vma,
    1581                 :            :                                     struct file_ra_state *ra,
    1582                 :            :                                     struct file *file,
    1583                 :            :                                     struct page *page,
    1584                 :            :                                     pgoff_t offset)
    1585                 :            : {
    1586                 :   40920414 :         struct address_space *mapping = file->f_mapping;
    1587                 :            : 
    1588                 :            :         /* If we don't want any read-ahead, don't bother */
    1589         [ +  + ]:   40920414 :         if (vma->vm_flags & VM_RAND_READ)
    1590                 :          0 :                 return;
    1591         [ +  + ]:   40920278 :         if (ra->mmap_miss > 0)
    1592                 :       2765 :                 ra->mmap_miss--;
    1593         [ +  + ]:   40920278 :         if (PageReadahead(page))
    1594                 :       3604 :                 page_cache_async_readahead(mapping, ra, file,
    1595                 :       3604 :                                            page, offset, ra->ra_pages);
    1596                 :            : }
    1597                 :            : 
    1598                 :            : /**
    1599                 :            :  * filemap_fault - read in file data for page fault handling
    1600                 :            :  * @vma:        vma in which the fault was taken
    1601                 :            :  * @vmf:        struct vm_fault containing details of the fault
    1602                 :            :  *
    1603                 :            :  * filemap_fault() is invoked via the vma operations vector for a
    1604                 :            :  * mapped memory region to read in file data during a page fault.
    1605                 :            :  *
    1606                 :            :  * The goto's are kind of ugly, but this streamlines the normal case of having
    1607                 :            :  * it in the page cache, and handles the special cases reasonably without
    1608                 :            :  * having a lot of duplicated code.
    1609                 :            :  */
    1610                 :          0 : int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
    1611                 :            : {
    1612                 :            :         int error;
    1613                 :   40932586 :         struct file *file = vma->vm_file;
    1614                 :   40932586 :         struct address_space *mapping = file->f_mapping;
    1615                 :   40932586 :         struct file_ra_state *ra = &file->f_ra;
    1616                 :   40932586 :         struct inode *inode = mapping->host;
    1617                 :   40932586 :         pgoff_t offset = vmf->pgoff;
    1618                 :            :         struct page *page;
    1619                 :            :         pgoff_t size;
    1620                 :            :         int ret = 0;
    1621                 :            : 
    1622                 :   40931762 :         size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
    1623            [ + ]:   40931762 :         if (offset >= size)
    1624                 :            :                 return VM_FAULT_SIGBUS;
    1625                 :            : 
    1626                 :            :         /*
    1627                 :            :          * Do we have something in the page cache already?
    1628                 :            :          */
    1629                 :   40933011 :         page = find_get_page(mapping, offset);
    1630 [ +  + ][ +  + ]:   40932167 :         if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
    1631                 :            :                 /*
    1632                 :            :                  * We found the page, so try async readahead before
    1633                 :            :                  * waiting for the lock.
    1634                 :            :                  */
    1635                 :   40919593 :                 do_async_mmap_readahead(vma, ra, file, page, offset);
    1636         [ +  + ]:      12574 :         } else if (!page) {
    1637                 :            :                 /* No page in the page cache at all */
    1638                 :       2803 :                 do_sync_mmap_readahead(vma, ra, file, offset);
    1639                 :            :                 count_vm_event(PGMAJFAULT);
    1640                 :            :                 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
    1641                 :            :                 ret = VM_FAULT_MAJOR;
    1642                 :            : retry_find:
    1643                 :       3534 :                 page = find_get_page(mapping, offset);
    1644         [ +  + ]:       3534 :                 if (!page)
    1645                 :            :                         goto no_cached_page;
    1646                 :            :         }
    1647                 :            : 
    1648         [ +  + ]:   40933499 :         if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
    1649                 :      11142 :                 page_cache_release(page);
    1650                 :      11142 :                 return ret | VM_FAULT_RETRY;
    1651                 :            :         }
    1652                 :            : 
    1653                 :            :         /* Did it get truncated? */
    1654         [ -  + ]:   40922357 :         if (unlikely(page->mapping != mapping)) {
    1655                 :          0 :                 unlock_page(page);
    1656                 :          0 :                 put_page(page);
    1657                 :          0 :                 goto retry_find;
    1658                 :            :         }
    1659                 :            :         VM_BUG_ON(page->index != offset);
    1660                 :            : 
    1661                 :            :         /*
    1662                 :            :          * We have a locked page in the page cache, now we need to check
    1663                 :            :          * that it's up-to-date. If not, it is going to be due to an error.
    1664                 :            :          */
    1665         [ +  + ]:   40922138 :         if (unlikely(!PageUptodate(page)))
    1666                 :            :                 goto page_not_uptodate;
    1667                 :            : 
    1668                 :            :         /*
    1669                 :            :          * Found the page and have a reference on it.
    1670                 :            :          * We must recheck i_size under page lock.
    1671                 :            :          */
    1672                 :   40921714 :         size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
    1673         [ -  + ]:   40921714 :         if (unlikely(offset >= size)) {
    1674                 :          0 :                 unlock_page(page);
    1675                 :          0 :                 page_cache_release(page);
    1676                 :          0 :                 return VM_FAULT_SIGBUS;
    1677                 :            :         }
    1678                 :            : 
    1679                 :   40921714 :         vmf->page = page;
    1680                 :   40921714 :         return ret | VM_FAULT_LOCKED;
    1681                 :            : 
    1682                 :            : no_cached_page:
    1683                 :            :         /*
    1684                 :            :          * We're only likely to ever get here if MADV_RANDOM is in
    1685                 :            :          * effect.
    1686                 :            :          */
    1687                 :         17 :         error = page_cache_read(file, offset);
    1688                 :            : 
    1689                 :            :         /*
    1690                 :            :          * The page we want has now been added to the page cache.
    1691                 :            :          * In the unlikely event that someone removed it in the
    1692                 :            :          * meantime, we'll just come back here and read it again.
    1693                 :            :          */
    1694         [ +  - ]:         17 :         if (error >= 0)
    1695                 :            :                 goto retry_find;
    1696                 :            : 
    1697                 :            :         /*
    1698                 :            :          * An error return from page_cache_read can result if the
    1699                 :            :          * system is low on memory, or a problem occurs while trying
    1700                 :            :          * to schedule I/O.
    1701                 :            :          */
    1702         [ #  # ]:          0 :         if (error == -ENOMEM)
    1703                 :            :                 return VM_FAULT_OOM;
    1704                 :          0 :         return VM_FAULT_SIGBUS;
    1705                 :            : 
    1706                 :            : page_not_uptodate:
    1707                 :            :         /*
    1708                 :            :          * Umm, take care of errors if the page isn't up-to-date.
    1709                 :            :          * Try to re-read it _once_. We do this synchronously,
    1710                 :            :          * because there really aren't any performance issues here
    1711                 :            :          * and we need to check for errors.
    1712                 :            :          */
    1713                 :            :         ClearPageError(page);
    1714                 :        714 :         error = mapping->a_ops->readpage(file, page);
    1715         [ +  - ]:        714 :         if (!error) {
    1716                 :            :                 wait_on_page_locked(page);
    1717         [ -  + ]:        714 :                 if (!PageUptodate(page))
    1718                 :            :                         error = -EIO;
    1719                 :            :         }
    1720                 :        714 :         page_cache_release(page);
    1721                 :            : 
    1722         [ +  - ]:        714 :         if (!error || error == AOP_TRUNCATED_PAGE)
    1723                 :            :                 goto retry_find;
    1724                 :            : 
    1725                 :            :         /* Things didn't work out. Return zero to tell the mm layer so. */
    1726                 :            :         shrink_readahead_size_eio(file, ra);
    1727                 :          0 :         return VM_FAULT_SIGBUS;
    1728                 :            : }
    1729                 :            : EXPORT_SYMBOL(filemap_fault);
    1730                 :            : 
    1731                 :          0 : int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
    1732                 :            : {
    1733                 :          0 :         struct page *page = vmf->page;
    1734                 :          0 :         struct inode *inode = file_inode(vma->vm_file);
    1735                 :            :         int ret = VM_FAULT_LOCKED;
    1736                 :            : 
    1737                 :          0 :         sb_start_pagefault(inode->i_sb);
    1738                 :          0 :         file_update_time(vma->vm_file);
    1739                 :            :         lock_page(page);
    1740         [ #  # ]:          0 :         if (page->mapping != inode->i_mapping) {
    1741                 :          0 :                 unlock_page(page);
    1742                 :            :                 ret = VM_FAULT_NOPAGE;
    1743                 :          0 :                 goto out;
    1744                 :            :         }
    1745                 :            :         /*
    1746                 :            :          * We mark the page dirty already here so that when freeze is in
    1747                 :            :          * progress, we are guaranteed that writeback during freezing will
    1748                 :            :          * see the dirty page and writeprotect it again.
    1749                 :            :          */
    1750                 :          0 :         set_page_dirty(page);
    1751                 :          0 :         wait_for_stable_page(page);
    1752                 :            : out:
    1753                 :          0 :         sb_end_pagefault(inode->i_sb);
    1754                 :          0 :         return ret;
    1755                 :            : }
    1756                 :            : EXPORT_SYMBOL(filemap_page_mkwrite);
    1757                 :            : 
    1758                 :            : const struct vm_operations_struct generic_file_vm_ops = {
    1759                 :            :         .fault          = filemap_fault,
    1760                 :            :         .page_mkwrite   = filemap_page_mkwrite,
    1761                 :            :         .remap_pages    = generic_file_remap_pages,
    1762                 :            : };
    1763                 :            : 
    1764                 :            : /* This is used for a general mmap of a disk file */
    1765                 :            : 
    1766                 :          0 : int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
    1767                 :            : {
    1768                 :          0 :         struct address_space *mapping = file->f_mapping;
    1769                 :            : 
    1770         [ #  # ]:          0 :         if (!mapping->a_ops->readpage)
    1771                 :            :                 return -ENOEXEC;
    1772                 :            :         file_accessed(file);
    1773                 :          0 :         vma->vm_ops = &generic_file_vm_ops;
    1774                 :          0 :         return 0;
    1775                 :            : }
    1776                 :            : 
    1777                 :            : /*
    1778                 :            :  * This is for filesystems which do not implement ->writepage.
    1779                 :            :  */
    1780                 :          0 : int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
    1781                 :            : {
    1782         [ #  # ]:          0 :         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
    1783                 :            :                 return -EINVAL;
    1784                 :          0 :         return generic_file_mmap(file, vma);
    1785                 :            : }
    1786                 :            : #else
    1787                 :            : int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
    1788                 :            : {
    1789                 :            :         return -ENOSYS;
    1790                 :            : }
    1791                 :            : int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
    1792                 :            : {
    1793                 :            :         return -ENOSYS;
    1794                 :            : }
    1795                 :            : #endif /* CONFIG_MMU */
    1796                 :            : 
    1797                 :            : EXPORT_SYMBOL(generic_file_mmap);
    1798                 :            : EXPORT_SYMBOL(generic_file_readonly_mmap);
    1799                 :            : 
    1800                 :       5499 : static struct page *__read_cache_page(struct address_space *mapping,
    1801                 :            :                                 pgoff_t index,
    1802                 :            :                                 int (*filler)(void *, struct page *),
    1803                 :            :                                 void *data,
    1804                 :            :                                 gfp_t gfp)
    1805                 :            : {
    1806                 :            :         struct page *page;
    1807                 :            :         int err;
    1808                 :            : repeat:
    1809                 :       5499 :         page = find_get_page(mapping, index);
    1810         [ +  + ]:      10998 :         if (!page) {
    1811                 :          1 :                 page = __page_cache_alloc(gfp | __GFP_COLD);
    1812         [ +  - ]:          1 :                 if (!page)
    1813                 :            :                         return ERR_PTR(-ENOMEM);
    1814                 :          1 :                 err = add_to_page_cache_lru(page, mapping, index, gfp);
    1815         [ -  + ]:          1 :                 if (unlikely(err)) {
    1816                 :          0 :                         page_cache_release(page);
    1817         [ #  # ]:          0 :                         if (err == -EEXIST)
    1818                 :            :                                 goto repeat;
    1819                 :            :                         /* Presumably ENOMEM for radix tree node */
    1820                 :          0 :                         return ERR_PTR(err);
    1821                 :            :                 }
    1822                 :          1 :                 err = filler(data, page);
    1823         [ -  + ]:          1 :                 if (err < 0) {
    1824                 :          0 :                         page_cache_release(page);
    1825                 :            :                         page = ERR_PTR(err);
    1826                 :            :                 }
    1827                 :            :         }
    1828                 :       5499 :         return page;
    1829                 :            : }
    1830                 :            : 
    1831                 :       5499 : static struct page *do_read_cache_page(struct address_space *mapping,
    1832                 :            :                                 pgoff_t index,
    1833                 :            :                                 int (*filler)(void *, struct page *),
    1834                 :            :                                 void *data,
    1835                 :            :                                 gfp_t gfp)
    1836                 :            : 
    1837                 :            : {
    1838                 :            :         struct page *page;
    1839                 :            :         int err;
    1840                 :            : 
    1841                 :            : retry:
    1842                 :       5499 :         page = __read_cache_page(mapping, index, filler, data, gfp);
    1843         [ +  - ]:       5499 :         if (IS_ERR(page))
    1844                 :            :                 return page;
    1845         [ -  + ]:       5499 :         if (PageUptodate(page))
    1846                 :            :                 goto out;
    1847                 :            : 
    1848                 :            :         lock_page(page);
    1849         [ #  # ]:          0 :         if (!page->mapping) {
    1850                 :          0 :                 unlock_page(page);
    1851                 :          0 :                 page_cache_release(page);
    1852                 :          0 :                 goto retry;
    1853                 :            :         }
    1854         [ #  # ]:          0 :         if (PageUptodate(page)) {
    1855                 :          0 :                 unlock_page(page);
    1856                 :          0 :                 goto out;
    1857                 :            :         }
    1858                 :          0 :         err = filler(data, page);
    1859         [ #  # ]:          0 :         if (err < 0) {
    1860                 :          0 :                 page_cache_release(page);
    1861                 :          0 :                 return ERR_PTR(err);
    1862                 :            :         }
    1863                 :            : out:
    1864                 :       5499 :         mark_page_accessed(page);
    1865                 :       5499 :         return page;
    1866                 :            : }
    1867                 :            : 
    1868                 :            : /**
    1869                 :            :  * read_cache_page_async - read into page cache, fill it if needed
    1870                 :            :  * @mapping:    the page's address_space
    1871                 :            :  * @index:      the page index
    1872                 :            :  * @filler:     function to perform the read
    1873                 :            :  * @data:       first arg to filler(data, page) function, often left as NULL
    1874                 :            :  *
    1875                 :            :  * Same as read_cache_page, but don't wait for page to become unlocked
    1876                 :            :  * after submitting it to the filler.
    1877                 :            :  *
    1878                 :            :  * Read into the page cache. If a page already exists, and PageUptodate() is
    1879                 :            :  * not set, try to fill the page but don't wait for it to become unlocked.
    1880                 :            :  *
    1881                 :            :  * If the page does not get brought uptodate, return -EIO.
    1882                 :            :  */
    1883                 :          0 : struct page *read_cache_page_async(struct address_space *mapping,
    1884                 :            :                                 pgoff_t index,
    1885                 :            :                                 int (*filler)(void *, struct page *),
    1886                 :            :                                 void *data)
    1887                 :            : {
    1888                 :       5499 :         return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
    1889                 :            : }
    1890                 :            : EXPORT_SYMBOL(read_cache_page_async);
    1891                 :            : 
    1892                 :          0 : static struct page *wait_on_page_read(struct page *page)
    1893                 :            : {
    1894         [ +  - ]:       5499 :         if (!IS_ERR(page)) {
    1895                 :            :                 wait_on_page_locked(page);
    1896         [ -  + ]:       5499 :                 if (!PageUptodate(page)) {
    1897                 :          0 :                         page_cache_release(page);
    1898                 :            :                         page = ERR_PTR(-EIO);
    1899                 :            :                 }
    1900                 :            :         }
    1901                 :       5499 :         return page;
    1902                 :            : }
    1903                 :            : 
    1904                 :            : /**
    1905                 :            :  * read_cache_page_gfp - read into page cache, using specified page allocation flags.
    1906                 :            :  * @mapping:    the page's address_space
    1907                 :            :  * @index:      the page index
    1908                 :            :  * @gfp:        the page allocator flags to use if allocating
    1909                 :            :  *
    1910                 :            :  * This is the same as "read_mapping_page(mapping, index, NULL)", but with
    1911                 :            :  * any new page allocations done using the specified allocation flags.
    1912                 :            :  *
    1913                 :            :  * If the page does not get brought uptodate, return -EIO.
    1914                 :            :  */
    1915                 :          0 : struct page *read_cache_page_gfp(struct address_space *mapping,
    1916                 :            :                                 pgoff_t index,
    1917                 :            :                                 gfp_t gfp)
    1918                 :            : {
    1919                 :          0 :         filler_t *filler = (filler_t *)mapping->a_ops->readpage;
    1920                 :            : 
    1921                 :          0 :         return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
    1922                 :            : }
    1923                 :            : EXPORT_SYMBOL(read_cache_page_gfp);
    1924                 :            : 
    1925                 :            : /**
    1926                 :            :  * read_cache_page - read into page cache, fill it if needed
    1927                 :            :  * @mapping:    the page's address_space
    1928                 :            :  * @index:      the page index
    1929                 :            :  * @filler:     function to perform the read
    1930                 :            :  * @data:       first arg to filler(data, page) function, often left as NULL
    1931                 :            :  *
    1932                 :            :  * Read into the page cache. If a page already exists, and PageUptodate() is
    1933                 :            :  * not set, try to fill the page then wait for it to become unlocked.
    1934                 :            :  *
    1935                 :            :  * If the page does not get brought uptodate, return -EIO.
    1936                 :            :  */
    1937                 :          0 : struct page *read_cache_page(struct address_space *mapping,
    1938                 :            :                                 pgoff_t index,
    1939                 :            :                                 int (*filler)(void *, struct page *),
    1940                 :            :                                 void *data)
    1941                 :            : {
    1942                 :       5499 :         return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
    1943                 :            : }
    1944                 :            : EXPORT_SYMBOL(read_cache_page);
    1945                 :            : 
    1946                 :          0 : static size_t __iovec_copy_from_user_inatomic(char *vaddr,
    1947                 :            :                         const struct iovec *iov, size_t base, size_t bytes)
    1948                 :            : {
    1949                 :            :         size_t copied = 0, left = 0;
    1950                 :            : 
    1951         [ +  + ]:    1469524 :         while (bytes) {
    1952                 :    1332032 :                 char __user *buf = iov->iov_base + base;
    1953                 :    1332032 :                 int copy = min(bytes, iov->iov_len - base);
    1954                 :            : 
    1955                 :            :                 base = 0;
    1956                 :    1332032 :                 left = __copy_from_user_inatomic(vaddr, buf, copy);
    1957                 :    1331883 :                 copied += copy;
    1958                 :    1331883 :                 bytes -= copy;
    1959                 :    1331883 :                 vaddr += copy;
    1960                 :    1331883 :                 iov++;
    1961                 :            : 
    1962            [ + ]:    1331883 :                 if (unlikely(left))
    1963                 :            :                         break;
    1964                 :            :         }
    1965                 :          0 :         return copied - left;
    1966                 :            : }
    1967                 :            : 
    1968                 :            : /*
    1969                 :            :  * Copy as much as we can into the page and return the number of bytes which
    1970                 :            :  * were successfully copied.  If a fault is encountered then return the number of
    1971                 :            :  * bytes which were copied.
    1972                 :            :  */
    1973                 :          0 : size_t iov_iter_copy_from_user_atomic(struct page *page,
    1974                 :            :                 struct iov_iter *i, unsigned long offset, size_t bytes)
    1975                 :            : {
    1976                 :            :         char *kaddr;
    1977                 :            :         size_t copied;
    1978                 :            : 
    1979         [ -  + ]:    6894184 :         BUG_ON(!in_atomic());
    1980                 :    6894184 :         kaddr = kmap_atomic(page);
    1981         [ +  + ]:    6894208 :         if (likely(i->nr_segs == 1)) {
    1982                 :            :                 int left;
    1983                 :    6756726 :                 char __user *buf = i->iov->iov_base + i->iov_offset;
    1984                 :    6756726 :                 left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
    1985                 :    6756705 :                 copied = bytes - left;
    1986                 :            :         } else {
    1987                 :     137482 :                 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
    1988                 :            :                                                 i->iov, i->iov_offset, bytes);
    1989                 :            :         }
    1990                 :    6894207 :         kunmap_atomic(kaddr);
    1991                 :            : 
    1992                 :    6894177 :         return copied;
    1993                 :            : }
    1994                 :            : EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
    1995                 :            : 
    1996                 :            : /*
    1997                 :            :  * This has the same sideeffects and return value as
    1998                 :            :  * iov_iter_copy_from_user_atomic().
    1999                 :            :  * The difference is that it attempts to resolve faults.
    2000                 :            :  * Page must not be locked.
    2001                 :            :  */
    2002                 :          0 : size_t iov_iter_copy_from_user(struct page *page,
    2003                 :            :                 struct iov_iter *i, unsigned long offset, size_t bytes)
    2004                 :            : {
    2005                 :            :         char *kaddr;
    2006                 :            :         size_t copied;
    2007                 :            : 
    2008                 :          0 :         kaddr = kmap(page);
    2009         [ #  # ]:          0 :         if (likely(i->nr_segs == 1)) {
    2010                 :            :                 int left;
    2011                 :          0 :                 char __user *buf = i->iov->iov_base + i->iov_offset;
    2012                 :          0 :                 left = __copy_from_user(kaddr + offset, buf, bytes);
    2013                 :          0 :                 copied = bytes - left;
    2014                 :            :         } else {
    2015                 :          0 :                 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
    2016                 :            :                                                 i->iov, i->iov_offset, bytes);
    2017                 :            :         }
    2018                 :          0 :         kunmap(page);
    2019                 :          0 :         return copied;
    2020                 :            : }
    2021                 :            : EXPORT_SYMBOL(iov_iter_copy_from_user);
    2022                 :            : 
    2023                 :          0 : void iov_iter_advance(struct iov_iter *i, size_t bytes)
    2024                 :            : {
    2025         [ -  + ]:   10361461 :         BUG_ON(i->count < bytes);
    2026                 :            : 
    2027         [ +  + ]:   10361461 :         if (likely(i->nr_segs == 1)) {
    2028                 :   10141212 :                 i->iov_offset += bytes;
    2029                 :   10141212 :                 i->count -= bytes;
    2030                 :            :         } else {
    2031                 :     220249 :                 const struct iovec *iov = i->iov;
    2032                 :     220249 :                 size_t base = i->iov_offset;
    2033                 :            :                 unsigned long nr_segs = i->nr_segs;
    2034                 :            : 
    2035                 :            :                 /*
    2036                 :            :                  * The !iov->iov_len check ensures we skip over unlikely
    2037                 :            :                  * zero-length segments (without overruning the iovec).
    2038                 :            :                  */
    2039 [ +  + ][ +  + ]:    1553086 :                 while (bytes || unlikely(i->count && !iov->iov_len)) {
                    [ + ]
    2040                 :            :                         int copy;
    2041                 :            : 
    2042                 :    1332814 :                         copy = min(bytes, iov->iov_len - base);
    2043    [ +  + ][ + ]:    1332814 :                         BUG_ON(!i->count || i->count < copy);
    2044                 :    1332837 :                         i->count -= copy;
    2045                 :    1332837 :                         bytes -= copy;
    2046                 :    1332837 :                         base += copy;
    2047            [ + ]:    1332837 :                         if (iov->iov_len == base) {
    2048                 :    1332866 :                                 iov++;
    2049                 :    1332837 :                                 nr_segs--;
    2050                 :            :                                 base = 0;
    2051                 :            :                         }
    2052                 :            :                 }
    2053                 :     220272 :                 i->iov = iov;
    2054                 :     220272 :                 i->iov_offset = base;
    2055                 :     220272 :                 i->nr_segs = nr_segs;
    2056                 :            :         }
    2057                 :   10361484 : }
    2058                 :            : EXPORT_SYMBOL(iov_iter_advance);
    2059                 :            : 
    2060                 :            : /*
    2061                 :            :  * Fault in the first iovec of the given iov_iter, to a maximum length
    2062                 :            :  * of bytes. Returns 0 on success, or non-zero if the memory could not be
    2063                 :            :  * accessed (ie. because it is an invalid address).
    2064                 :            :  *
    2065                 :            :  * writev-intensive code may want this to prefault several iovecs -- that
    2066                 :            :  * would be possible (callers must not rely on the fact that _only_ the
    2067                 :            :  * first iovec will be faulted with the current implementation).
    2068                 :            :  */
    2069                 :          0 : int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
    2070                 :            : {
    2071                 :    6894232 :         char __user *buf = i->iov->iov_base + i->iov_offset;
    2072                 :    6894232 :         bytes = min(bytes, i->iov->iov_len - i->iov_offset);
    2073                 :    6894219 :         return fault_in_pages_readable(buf, bytes);
    2074                 :            : }
    2075                 :            : EXPORT_SYMBOL(iov_iter_fault_in_readable);
    2076                 :            : 
    2077                 :            : /*
    2078                 :            :  * Return the count of just the current iov_iter segment.
    2079                 :            :  */
    2080                 :          0 : size_t iov_iter_single_seg_count(const struct iov_iter *i)
    2081                 :            : {
    2082                 :          2 :         const struct iovec *iov = i->iov;
    2083 [ -  + ][ #  # ]:          2 :         if (i->nr_segs == 1)
    2084                 :          0 :                 return i->count;
    2085                 :            :         else
    2086                 :          2 :                 return min(i->count, iov->iov_len - i->iov_offset);
    2087                 :            : }
    2088                 :            : EXPORT_SYMBOL(iov_iter_single_seg_count);
    2089                 :            : 
    2090                 :            : /*
    2091                 :            :  * Performs necessary checks before doing a write
    2092                 :            :  *
    2093                 :            :  * Can adjust writing position or amount of bytes to write.
    2094                 :            :  * Returns appropriate error code that caller should return or
    2095                 :            :  * zero in case that write should be allowed.
    2096                 :            :  */
    2097                 :          0 : inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk)
    2098                 :            : {
    2099                 :    3566292 :         struct inode *inode = file->f_mapping->host;
    2100                 :            :         unsigned long limit = rlimit(RLIMIT_FSIZE);
    2101                 :            : 
    2102    [ + ][ #  # ]:    3566292 :         if (unlikely(*pos < 0))
    2103                 :            :                 return -EINVAL;
    2104                 :            : 
    2105 [ +  + ][ #  # ]:    3566359 :         if (!isblk) {
    2106                 :            :                 /* FIXME: this is for backwards compatibility with 2.4 */
    2107 [ +  + ][ #  # ]:    3566032 :                 if (file->f_flags & O_APPEND)
    2108                 :          0 :                         *pos = i_size_read(inode);
    2109                 :            : 
    2110 [ +  + ][ #  # ]:    3566148 :                 if (limit != RLIM_INFINITY) {
    2111 [ +  + ][ #  # ]:          5 :                         if (*pos >= limit) {
    2112                 :          2 :                                 send_sig(SIGXFSZ, current, 0);
    2113                 :          0 :                                 return -EFBIG;
    2114                 :            :                         }
    2115 [ +  + ][ #  # ]:          3 :                         if (*count > limit - (typeof(limit))*pos) {
    2116                 :          0 :                                 *count = limit - (typeof(limit))*pos;
    2117                 :            :                         }
    2118                 :            :                 }
    2119                 :            :         }
    2120                 :            : 
    2121                 :            :         /*
    2122                 :            :          * LFS rule
    2123                 :            :          */
    2124 [ -  + ][ #  # ]:    3566473 :         if (unlikely(*pos + *count > MAX_NON_LFS &&
         [ #  # ][ #  # ]
    2125                 :            :                                 !(file->f_flags & O_LARGEFILE))) {
    2126 [ #  # ][ #  # ]:          0 :                 if (*pos >= MAX_NON_LFS) {
    2127                 :            :                         return -EFBIG;
    2128                 :            :                 }
    2129 [ #  # ][ #  # ]:          0 :                 if (*count > MAX_NON_LFS - (unsigned long)*pos) {
    2130                 :          0 :                         *count = MAX_NON_LFS - (unsigned long)*pos;
    2131                 :            :                 }
    2132                 :            :         }
    2133                 :            : 
    2134                 :            :         /*
    2135                 :            :          * Are we about to exceed the fs block limit ?
    2136                 :            :          *
    2137                 :            :          * If we have written data it becomes a short write.  If we have
    2138                 :            :          * exceeded without writing data we send a signal and return EFBIG.
    2139                 :            :          * Linus frestrict idea will clean these up nicely..
    2140                 :            :          */
    2141 [ +  - ][ #  # ]:    3566473 :         if (likely(!isblk)) {
    2142 [ -  + ][ #  # ]:    3566473 :                 if (unlikely(*pos >= inode->i_sb->s_maxbytes)) {
    2143 [ #  # ][ #  # ]:          0 :                         if (*count || *pos > inode->i_sb->s_maxbytes) {
         [ #  # ][ #  # ]
    2144                 :            :                                 return -EFBIG;
    2145                 :            :                         }
    2146                 :            :                         /* zero-length writes at ->s_maxbytes are OK */
    2147                 :            :                 }
    2148                 :            : 
    2149 [ -  + ][ #  # ]:    3566401 :                 if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
    2150                 :          0 :                         *count = inode->i_sb->s_maxbytes - *pos;
    2151                 :            :         } else {
    2152                 :            : #ifdef CONFIG_BLOCK
    2153                 :            :                 loff_t isize;
    2154 [ #  # ][ #  # ]:          0 :                 if (bdev_read_only(I_BDEV(inode)))
    2155                 :            :                         return -EPERM;
    2156                 :            :                 isize = i_size_read(inode);
    2157 [ #  # ][ #  # ]:          0 :                 if (*pos >= isize) {
    2158 [ #  # ][ #  # ]:          0 :                         if (*count || *pos > isize)
         [ #  # ][ #  # ]
    2159                 :            :                                 return -ENOSPC;
    2160                 :            :                 }
    2161                 :            : 
    2162 [ #  # ][ #  # ]:          0 :                 if (*pos + *count > isize)
    2163                 :          0 :                         *count = isize - *pos;
    2164                 :            : #else
    2165                 :            :                 return -EPERM;
    2166                 :            : #endif
    2167                 :            :         }
    2168                 :            :         return 0;
    2169                 :            : }
    2170                 :            : EXPORT_SYMBOL(generic_write_checks);
    2171                 :            : 
    2172                 :          0 : int pagecache_write_begin(struct file *file, struct address_space *mapping,
    2173                 :            :                                 loff_t pos, unsigned len, unsigned flags,
    2174                 :            :                                 struct page **pagep, void **fsdata)
    2175                 :            : {
    2176                 :      26700 :         const struct address_space_operations *aops = mapping->a_ops;
    2177                 :            : 
    2178                 :      26700 :         return aops->write_begin(file, mapping, pos, len, flags,
    2179                 :            :                                                         pagep, fsdata);
    2180                 :            : }
    2181                 :            : EXPORT_SYMBOL(pagecache_write_begin);
    2182                 :            : 
    2183                 :          0 : int pagecache_write_end(struct file *file, struct address_space *mapping,
    2184                 :            :                                 loff_t pos, unsigned len, unsigned copied,
    2185                 :            :                                 struct page *page, void *fsdata)
    2186                 :            : {
    2187                 :      26700 :         const struct address_space_operations *aops = mapping->a_ops;
    2188                 :            : 
    2189                 :      26700 :         mark_page_accessed(page);
    2190                 :      26700 :         return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
    2191                 :            : }
    2192                 :            : EXPORT_SYMBOL(pagecache_write_end);
    2193                 :            : 
    2194                 :            : ssize_t
    2195                 :          0 : generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
    2196                 :            :                 unsigned long *nr_segs, loff_t pos, loff_t *ppos,
    2197                 :            :                 size_t count, size_t ocount)
    2198                 :            : {
    2199                 :      99111 :         struct file     *file = iocb->ki_filp;
    2200                 :      99111 :         struct address_space *mapping = file->f_mapping;
    2201                 :      99111 :         struct inode    *inode = mapping->host;
    2202                 :            :         ssize_t         written;
    2203                 :            :         size_t          write_len;
    2204                 :            :         pgoff_t         end;
    2205                 :            : 
    2206         [ -  + ]:      99111 :         if (count != ocount)
    2207                 :          0 :                 *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
    2208                 :            : 
    2209                 :      99111 :         write_len = iov_length(iov, *nr_segs);
    2210                 :      99111 :         end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
    2211                 :            : 
    2212                 :      99111 :         written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
    2213         [ +  - ]:      99111 :         if (written)
    2214                 :            :                 goto out;
    2215                 :            : 
    2216                 :            :         /*
    2217                 :            :          * After a write we want buffered reads to be sure to go to disk to get
    2218                 :            :          * the new data.  We invalidate clean cached page from the region we're
    2219                 :            :          * about to write.  We do this *before* the write so that we can return
    2220                 :            :          * without clobbering -EIOCBQUEUED from ->direct_IO().
    2221                 :            :          */
    2222         [ +  + ]:      99111 :         if (mapping->nrpages) {
    2223                 :      14729 :                 written = invalidate_inode_pages2_range(mapping,
    2224                 :      14729 :                                         pos >> PAGE_CACHE_SHIFT, end);
    2225                 :            :                 /*
    2226                 :            :                  * If a page can not be invalidated, return 0 to fall back
    2227                 :            :                  * to buffered write.
    2228                 :            :                  */
    2229         [ -  + ]:      14729 :                 if (written) {
    2230         [ #  # ]:          0 :                         if (written == -EBUSY)
    2231                 :            :                                 return 0;
    2232                 :            :                         goto out;
    2233                 :            :                 }
    2234                 :            :         }
    2235                 :            : 
    2236                 :      99111 :         written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
    2237                 :            : 
    2238                 :            :         /*
    2239                 :            :          * Finally, try again to invalidate clean pages which might have been
    2240                 :            :          * cached by non-direct readahead, or faulted in by get_user_pages()
    2241                 :            :          * if the source of the write was an mmap'ed region of the file
    2242                 :            :          * we're writing.  Either one is a pretty crazy thing to do,
    2243                 :            :          * so we don't support it 100%.  If this invalidation
    2244                 :            :          * fails, tough, the write still worked...
    2245                 :            :          */
    2246         [ +  + ]:      99111 :         if (mapping->nrpages) {
    2247                 :         12 :                 invalidate_inode_pages2_range(mapping,
    2248                 :         12 :                                               pos >> PAGE_CACHE_SHIFT, end);
    2249                 :            :         }
    2250                 :            : 
    2251         [ +  + ]:     198222 :         if (written > 0) {
    2252                 :      99105 :                 pos += written;
    2253 [ -  + ][ #  # ]:      99105 :                 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
    2254                 :            :                         i_size_write(inode, pos);
    2255                 :            :                         mark_inode_dirty(inode);
    2256                 :            :                 }
    2257                 :      99105 :                 *ppos = pos;
    2258                 :            :         }
    2259                 :            : out:
    2260                 :      99111 :         return written;
    2261                 :            : }
    2262                 :            : EXPORT_SYMBOL(generic_file_direct_write);
    2263                 :            : 
    2264                 :            : /*
    2265                 :            :  * Find or create a page at the given pagecache position. Return the locked
    2266                 :            :  * page. This function is specifically for buffered writes.
    2267                 :            :  */
    2268                 :          0 : struct page *grab_cache_page_write_begin(struct address_space *mapping,
    2269                 :            :                                         pgoff_t index, unsigned flags)
    2270                 :            : {
    2271                 :            :         int status;
    2272                 :            :         gfp_t gfp_mask;
    2273                 :            :         struct page *page;
    2274                 :            :         gfp_t gfp_notmask = 0;
    2275                 :            : 
    2276                 :            :         gfp_mask = mapping_gfp_mask(mapping);
    2277            [ + ]:    6919583 :         if (mapping_cap_account_dirty(mapping))
    2278                 :    6919617 :                 gfp_mask |= __GFP_WRITE;
    2279         [ +  + ]:    6919583 :         if (flags & AOP_FLAG_NOFS)
    2280                 :            :                 gfp_notmask = __GFP_FS;
    2281                 :            : repeat:
    2282                 :    6919583 :         page = find_lock_page(mapping, index);
    2283         [ +  + ]:    6919571 :         if (page)
    2284                 :            :                 goto found;
    2285                 :            : 
    2286                 :    1676226 :         page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
    2287         [ +  - ]:    1676218 :         if (!page)
    2288                 :            :                 return NULL;
    2289                 :    1676218 :         status = add_to_page_cache_lru(page, mapping, index,
    2290                 :            :                                                 GFP_KERNEL & ~gfp_notmask);
    2291         [ -  + ]:    1676229 :         if (unlikely(status)) {
    2292                 :          0 :                 page_cache_release(page);
    2293         [ #  # ]:          0 :                 if (status == -EEXIST)
    2294                 :            :                         goto repeat;
    2295                 :            :                 return NULL;
    2296                 :            :         }
    2297                 :            : found:
    2298                 :    6919574 :         wait_for_stable_page(page);
    2299                 :    6919586 :         return page;
    2300                 :            : }
    2301                 :            : EXPORT_SYMBOL(grab_cache_page_write_begin);
    2302                 :            : 
    2303                 :          0 : static ssize_t generic_perform_write(struct file *file,
    2304                 :   13788221 :                                 struct iov_iter *i, loff_t pos)
    2305                 :            : {
    2306                 :   10361403 :         struct address_space *mapping = file->f_mapping;
    2307                 :    3467378 :         const struct address_space_operations *a_ops = mapping->a_ops;
    2308                 :            :         long status = 0;
    2309                 :            :         ssize_t written = 0;
    2310                 :            :         unsigned int flags = 0;
    2311                 :            : 
    2312                 :            :         /*
    2313                 :            :          * Copies from kernel address space cannot fail (NFSD is a big user).
    2314                 :            :          */
    2315         [ +  + ]:    3467378 :         if (segment_eq(get_fs(), KERNEL_DS))
    2316                 :            :                 flags |= AOP_FLAG_UNINTERRUPTIBLE;
    2317                 :            : 
    2318                 :            :         do {
    2319                 :            :                 struct page *page;
    2320                 :            :                 unsigned long offset;   /* Offset into pagecache page */
    2321                 :            :                 unsigned long bytes;    /* Bytes to write to page */
    2322                 :            :                 size_t copied;          /* Bytes copied from user */
    2323                 :            :                 void *fsdata;
    2324                 :            : 
    2325                 :    6894114 :                 offset = (pos & (PAGE_CACHE_SIZE - 1));
    2326                 :    6894114 :                 bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
    2327                 :            :                                                 iov_iter_count(i));
    2328                 :            : 
    2329                 :            : again:
    2330                 :            :                 /*
    2331                 :            :                  * Bring in the user page that we will copy from _first_.
    2332                 :            :                  * Otherwise there's a nasty deadlock on copying from the
    2333                 :            :                  * same page as we're writing to, without it being marked
    2334                 :            :                  * up-to-date.
    2335                 :            :                  *
    2336                 :            :                  * Not only is this an optimisation, but it is also required
    2337                 :            :                  * to check that the address is actually valid, when atomic
    2338                 :            :                  * usercopies are used, below.
    2339                 :            :                  */
    2340            [ + ]:    6894116 :                 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
    2341                 :            :                         status = -EFAULT;
    2342                 :         18 :                         break;
    2343                 :            :                 }
    2344                 :            : 
    2345                 :    6894086 :                 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
    2346                 :            :                                                 &page, &fsdata);
    2347         [ +  + ]:    6894105 :                 if (unlikely(status))
    2348                 :            :                         break;
    2349                 :            : 
    2350         [ +  + ]:    6894025 :                 if (mapping_writably_mapped(mapping))
    2351                 :    1860838 :                         flush_dcache_page(page);
    2352                 :            : 
    2353                 :            :                 pagefault_disable();
    2354                 :    6894052 :                 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
    2355                 :            :                 pagefault_enable();
    2356                 :    6894144 :                 flush_dcache_page(page);
    2357                 :            : 
    2358                 :    6894190 :                 mark_page_accessed(page);
    2359                 :    6894131 :                 status = a_ops->write_end(file, mapping, pos, bytes, copied,
    2360                 :            :                                                 page, fsdata);
    2361         [ +  + ]:    6894211 :                 if (unlikely(status < 0))
    2362                 :            :                         break;
    2363                 :    6894208 :                 copied = status;
    2364                 :            : 
    2365                 :    6894208 :                 cond_resched();
    2366                 :            : 
    2367                 :    6894219 :                 iov_iter_advance(i, copied);
    2368         [ +  + ]:   10361585 :                 if (unlikely(copied == 0)) {
    2369                 :            :                         /*
    2370                 :            :                          * If we were unable to copy any data at all, we must
    2371                 :            :                          * fall back to a single segment length write.
    2372                 :            :                          *
    2373                 :            :                          * If we didn't fallback here, we could livelock
    2374                 :            :                          * because not all segments in the iov can be copied at
    2375                 :            :                          * once without a pagefault.
    2376                 :            :                          */
    2377                 :          2 :                         bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
    2378                 :            :                                                 iov_iter_single_seg_count(i));
    2379                 :          2 :                         goto again;
    2380                 :            :                 }
    2381                 :    6894205 :                 pos += copied;
    2382                 :    6894205 :                 written += copied;
    2383                 :            : 
    2384                 :    6894205 :                 balance_dirty_pages_ratelimited(mapping);
    2385         [ +  + ]:    6894183 :                 if (fatal_signal_pending(current)) {
    2386                 :            :                         status = -EINTR;
    2387                 :            :                         break;
    2388                 :            :                 }
    2389         [ +  + ]:    6894107 :         } while (iov_iter_count(i));
    2390                 :            : 
    2391         [ +  + ]:    3467389 :         return written ? written : status;
    2392                 :            : }
    2393                 :            : 
    2394                 :            : ssize_t
    2395                 :          0 : generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
    2396                 :            :                 unsigned long nr_segs, loff_t pos, loff_t *ppos,
    2397                 :            :                 size_t count, ssize_t written)
    2398                 :            : {
    2399                 :    3467361 :         struct file *file = iocb->ki_filp;
    2400                 :            :         ssize_t status;
    2401                 :            :         struct iov_iter i;
    2402                 :            : 
    2403                 :    3467361 :         iov_iter_init(&i, iov, nr_segs, count, written);
    2404                 :    3467352 :         status = generic_perform_write(file, &i, pos);
    2405                 :            : 
    2406         [ +  + ]:    3467299 :         if (likely(status >= 0)) {
    2407                 :    3467269 :                 written += status;
    2408                 :    3467269 :                 *ppos = pos + status;
    2409                 :            :         }
    2410                 :            :         
    2411            [ + ]:    3467299 :         return written ? written : status;
    2412                 :            : }
    2413                 :            : EXPORT_SYMBOL(generic_file_buffered_write);
    2414                 :            : 
    2415                 :            : /**
    2416                 :            :  * __generic_file_aio_write - write data to a file
    2417                 :            :  * @iocb:       IO state structure (file, offset, etc.)
    2418                 :            :  * @iov:        vector with data to write
    2419                 :            :  * @nr_segs:    number of segments in the vector
    2420                 :            :  * @ppos:       position where to write
    2421                 :            :  *
    2422                 :            :  * This function does all the work needed for actually writing data to a
    2423                 :            :  * file. It does all basic checks, removes SUID from the file, updates
    2424                 :            :  * modification times and calls proper subroutines depending on whether we
    2425                 :            :  * do direct IO or a standard buffered write.
    2426                 :            :  *
    2427                 :            :  * It expects i_mutex to be grabbed unless we work on a block device or similar
    2428                 :            :  * object which does not need locking at all.
    2429                 :            :  *
    2430                 :            :  * This function does *not* take care of syncing data in case of O_SYNC write.
    2431                 :            :  * A caller has to handle it. This is mainly due to the fact that we want to
    2432                 :            :  * avoid syncing under i_mutex.
    2433                 :            :  */
    2434                 :          0 : ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
    2435                 :            :                                  unsigned long nr_segs, loff_t *ppos)
    2436                 :            : {
    2437                 :    3566501 :         struct file *file = iocb->ki_filp;
    2438                 :    3566501 :         struct address_space * mapping = file->f_mapping;
    2439                 :            :         size_t ocount;          /* original count */
    2440                 :            :         size_t count;           /* after file limit checks */
    2441                 :    3566501 :         struct inode    *inode = mapping->host;
    2442                 :            :         loff_t          pos;
    2443                 :            :         ssize_t         written;
    2444                 :            :         ssize_t         err;
    2445                 :            : 
    2446                 :    3566501 :         ocount = 0;
    2447                 :    3566501 :         err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
    2448         [ +  + ]:    3566449 :         if (err)
    2449                 :            :                 return err;
    2450                 :            : 
    2451                 :    3566292 :         count = ocount;
    2452                 :    3566292 :         pos = *ppos;
    2453                 :            : 
    2454                 :            :         /* We can write back this queue in page reclaim */
    2455                 :    3566292 :         current->backing_dev_info = mapping->backing_dev_info;
    2456                 :            :         written = 0;
    2457                 :            : 
    2458                 :    3566292 :         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
    2459            [ + ]:    7132952 :         if (err)
    2460                 :            :                 goto out;
    2461                 :            : 
    2462         [ +  + ]:    3566464 :         if (count == 0)
    2463                 :            :                 goto out;
    2464                 :            : 
    2465                 :    3566329 :         err = file_remove_suid(file);
    2466            [ + ]:    3566318 :         if (err)
    2467                 :            :                 goto out;
    2468                 :            : 
    2469                 :    3566479 :         err = file_update_time(file);
    2470         [ +  + ]:    3566463 :         if (err)
    2471                 :            :                 goto out;
    2472                 :            : 
    2473                 :            :         /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
    2474         [ +  + ]:    3566452 :         if (unlikely(file->f_flags & O_DIRECT)) {
    2475                 :            :                 loff_t endbyte;
    2476                 :            :                 ssize_t written_buffered;
    2477                 :            : 
    2478                 :      99111 :                 written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
    2479                 :            :                                                         ppos, count, ocount);
    2480 [ +  + ][ -  + ]:      99111 :                 if (written < 0 || written == count)
    2481                 :            :                         goto out;
    2482                 :            :                 /*
    2483                 :            :                  * direct-io write to a hole: fall through to buffered I/O
    2484                 :            :                  * for completing the rest of the request.
    2485                 :            :                  */
    2486                 :          0 :                 pos += written;
    2487                 :          0 :                 count -= written;
    2488                 :          0 :                 written_buffered = generic_file_buffered_write(iocb, iov,
    2489                 :            :                                                 nr_segs, pos, ppos, count,
    2490                 :            :                                                 written);
    2491                 :            :                 /*
    2492                 :            :                  * If generic_file_buffered_write() retuned a synchronous error
    2493                 :            :                  * then we want to return the number of bytes which were
    2494                 :            :                  * direct-written, or the error code if that was zero.  Note
    2495                 :            :                  * that this differs from normal direct-io semantics, which
    2496                 :            :                  * will return -EFOO even if some bytes were written.
    2497                 :            :                  */
    2498         [ #  # ]:          0 :                 if (written_buffered < 0) {
    2499                 :            :                         err = written_buffered;
    2500                 :            :                         goto out;
    2501                 :            :                 }
    2502                 :            : 
    2503                 :            :                 /*
    2504                 :            :                  * We need to ensure that the page cache pages are written to
    2505                 :            :                  * disk and invalidated to preserve the expected O_DIRECT
    2506                 :            :                  * semantics.
    2507                 :            :                  */
    2508                 :          0 :                 endbyte = pos + written_buffered - written - 1;
    2509                 :          0 :                 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
    2510         [ #  # ]:          0 :                 if (err == 0) {
    2511                 :            :                         written = written_buffered;
    2512                 :          0 :                         invalidate_mapping_pages(mapping,
    2513                 :          0 :                                                  pos >> PAGE_CACHE_SHIFT,
    2514                 :          0 :                                                  endbyte >> PAGE_CACHE_SHIFT);
    2515                 :            :                 } else {
    2516                 :            :                         /*
    2517                 :            :                          * We don't know how much we wrote, so just return
    2518                 :            :                          * the number of bytes which were direct-written
    2519                 :            :                          */
    2520                 :            :                 }
    2521                 :            :         } else {
    2522                 :    3467341 :                 written = generic_file_buffered_write(iocb, iov, nr_segs,
    2523                 :            :                                 pos, ppos, count, written);
    2524                 :            :         }
    2525                 :            : out:
    2526                 :    3566430 :         current->backing_dev_info = NULL;
    2527         [ +  + ]:    3566430 :         return written ? written : err;
    2528                 :            : }
    2529                 :            : EXPORT_SYMBOL(__generic_file_aio_write);
    2530                 :            : 
    2531                 :            : /**
    2532                 :            :  * generic_file_aio_write - write data to a file
    2533                 :            :  * @iocb:       IO state structure
    2534                 :            :  * @iov:        vector with data to write
    2535                 :            :  * @nr_segs:    number of segments in the vector
    2536                 :            :  * @pos:        position in file where to write
    2537                 :            :  *
    2538                 :            :  * This is a wrapper around __generic_file_aio_write() to be used by most
    2539                 :            :  * filesystems. It takes care of syncing the file in case of O_SYNC file
    2540                 :            :  * and acquires i_mutex as needed.
    2541                 :            :  */
    2542                 :          0 : ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
    2543                 :            :                 unsigned long nr_segs, loff_t pos)
    2544                 :            : {
    2545                 :    3467306 :         struct file *file = iocb->ki_filp;
    2546                 :    3467306 :         struct inode *inode = file->f_mapping->host;
    2547                 :            :         ssize_t ret;
    2548                 :            : 
    2549         [ -  + ]:    3467306 :         BUG_ON(iocb->ki_pos != pos);
    2550                 :            : 
    2551                 :    3467306 :         mutex_lock(&inode->i_mutex);
    2552                 :    3467172 :         ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
    2553                 :    3467249 :         mutex_unlock(&inode->i_mutex);
    2554                 :            : 
    2555         [ +  + ]:    3467361 :         if (ret > 0) {
    2556                 :            :                 ssize_t err;
    2557                 :            : 
    2558                 :    3467238 :                 err = generic_write_sync(file, pos, ret);
    2559         [ -  + ]:    3467148 :                 if (err < 0 && ret > 0)
    2560                 :            :                         ret = err;
    2561                 :            :         }
    2562                 :          0 :         return ret;
    2563                 :            : }
    2564                 :            : EXPORT_SYMBOL(generic_file_aio_write);
    2565                 :            : 
    2566                 :            : /**
    2567                 :            :  * try_to_release_page() - release old fs-specific metadata on a page
    2568                 :            :  *
    2569                 :            :  * @page: the page which the kernel is trying to free
    2570                 :            :  * @gfp_mask: memory allocation flags (and I/O mode)
    2571                 :            :  *
    2572                 :            :  * The address_space is to try to release any data against the page
    2573                 :            :  * (presumably at page->private).  If the release was successful, return `1'.
    2574                 :            :  * Otherwise return zero.
    2575                 :            :  *
    2576                 :            :  * This may also be called if PG_fscache is set on a page, indicating that the
    2577                 :            :  * page is known to the local caching routines.
    2578                 :            :  *
    2579                 :            :  * The @gfp_mask argument specifies whether I/O may be performed to release
    2580                 :            :  * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
    2581                 :            :  *
    2582                 :            :  */
    2583                 :          0 : int try_to_release_page(struct page *page, gfp_t gfp_mask)
    2584                 :            : {
    2585                 :    1923015 :         struct address_space * const mapping = page->mapping;
    2586                 :            : 
    2587         [ -  + ]:    1923015 :         BUG_ON(!PageLocked(page));
    2588            [ + ]:    1923015 :         if (PageWriteback(page))
    2589                 :            :                 return 0;
    2590                 :            : 
    2591 [ +  - ][ +  - ]:    1923017 :         if (mapping && mapping->a_ops->releasepage)
    2592                 :    1923017 :                 return mapping->a_ops->releasepage(page, gfp_mask);
    2593                 :          0 :         return try_to_free_buffers(page);
    2594                 :            : }
    2595                 :            : 
    2596                 :            : EXPORT_SYMBOL(try_to_release_page);

Generated by: LCOV version 1.9