LCOV - coverage.info - mm/swapfile.c

LCOV - code coverage report

Current view:	top level - mm - swapfile.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	268	924	29.0 %
Date:	2014-02-18	Functions:	21	62	33.9 %
		Branches:	150	687	21.8 %

           Branch data     Line data    Source code

       1                 :            : /*
       2                 :            :  *  linux/mm/swapfile.c
       3                 :            :  *
       4                 :            :  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
       5                 :            :  *  Swap reorganised 29.12.95, Stephen Tweedie
       6                 :            :  */
       7                 :            : 
       8                 :            : #include <linux/mm.h>
       9                 :            : #include <linux/hugetlb.h>
      10                 :            : #include <linux/mman.h>
      11                 :            : #include <linux/slab.h>
      12                 :            : #include <linux/kernel_stat.h>
      13                 :            : #include <linux/swap.h>
      14                 :            : #include <linux/vmalloc.h>
      15                 :            : #include <linux/pagemap.h>
      16                 :            : #include <linux/namei.h>
      17                 :            : #include <linux/shmem_fs.h>
      18                 :            : #include <linux/blkdev.h>
      19                 :            : #include <linux/random.h>
      20                 :            : #include <linux/writeback.h>
      21                 :            : #include <linux/proc_fs.h>
      22                 :            : #include <linux/seq_file.h>
      23                 :            : #include <linux/init.h>
      24                 :            : #include <linux/ksm.h>
      25                 :            : #include <linux/rmap.h>
      26                 :            : #include <linux/security.h>
      27                 :            : #include <linux/backing-dev.h>
      28                 :            : #include <linux/mutex.h>
      29                 :            : #include <linux/capability.h>
      30                 :            : #include <linux/syscalls.h>
      31                 :            : #include <linux/memcontrol.h>
      32                 :            : #include <linux/poll.h>
      33                 :            : #include <linux/oom.h>
      34                 :            : #include <linux/frontswap.h>
      35                 :            : #include <linux/swapfile.h>
      36                 :            : #include <linux/export.h>
      37                 :            : 
      38                 :            : #include <asm/pgtable.h>
      39                 :            : #include <asm/tlbflush.h>
      40                 :            : #include <linux/swapops.h>
      41                 :            : #include <linux/page_cgroup.h>
      42                 :            : 
      43                 :            : static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
      44                 :            :                                  unsigned char);
      45                 :            : static void free_swap_count_continuations(struct swap_info_struct *);
      46                 :            : static sector_t map_swap_entry(swp_entry_t, struct block_device**);
      47                 :            : 
      48                 :            : DEFINE_SPINLOCK(swap_lock);
      49                 :            : static unsigned int nr_swapfiles;
      50                 :            : atomic_long_t nr_swap_pages;
      51                 :            : /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
      52                 :            : long total_swap_pages;
      53                 :            : static int least_priority;
      54                 :            : static atomic_t highest_priority_index = ATOMIC_INIT(-1);
      55                 :            : 
      56                 :            : static const char Bad_file[] = "Bad swap file entry ";
      57                 :            : static const char Unused_file[] = "Unused swap file entry ";
      58                 :            : static const char Bad_offset[] = "Bad swap offset entry ";
      59                 :            : static const char Unused_offset[] = "Unused swap offset entry ";
      60                 :            : 
      61                 :            : struct swap_list_t swap_list = {-1, -1};
      62                 :            : 
      63                 :            : struct swap_info_struct *swap_info[MAX_SWAPFILES];
      64                 :            : 
      65                 :            : static DEFINE_MUTEX(swapon_mutex);
      66                 :            : 
      67                 :            : static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait);
      68                 :            : /* Activity counter to indicate that a swapon or swapoff has occurred */
      69                 :            : static atomic_t proc_poll_event = ATOMIC_INIT(0);
      70                 :            : 
      71                 :            : static inline unsigned char swap_count(unsigned char ent)
      72                 :            : {
      73                 :          0 :         return ent & ~SWAP_HAS_CACHE;       /* may include SWAP_HAS_CONT flag */
      74                 :            : }
      75                 :            : 
      76                 :            : /* returns 1 if swap entry is freed */
      77                 :            : static int
      78                 :          0 : __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
      79                 :            : {
      80                 :          0 :         swp_entry_t entry = swp_entry(si->type, offset);
      81                 :            :         struct page *page;
      82                 :            :         int ret = 0;
      83                 :            : 
      84                 :          0 :         page = find_get_page(swap_address_space(entry), entry.val);
      85         [ #  # ]:          0 :         if (!page)
      86                 :            :                 return 0;
      87                 :            :         /*
      88                 :            :          * This function is called from scan_swap_map() and it's called
      89                 :            :          * by vmscan.c at reclaiming pages. So, we hold a lock on a page, here.
      90                 :            :          * We have to use trylock for avoiding deadlock. This is a special
      91                 :            :          * case and you should use try_to_free_swap() with explicit lock_page()
      92                 :            :          * in usual operations.
      93                 :            :          */
      94         [ #  # ]:          0 :         if (trylock_page(page)) {
      95                 :          0 :                 ret = try_to_free_swap(page);
      96                 :          0 :                 unlock_page(page);
      97                 :            :         }
      98                 :          0 :         page_cache_release(page);
      99                 :            :         return ret;
     100                 :            : }
     101                 :            : 
     102                 :            : /*
     103                 :            :  * swapon tell device that all the old swap contents can be discarded,
     104                 :            :  * to allow the swap device to optimize its wear-levelling.
     105                 :            :  */
     106                 :          0 : static int discard_swap(struct swap_info_struct *si)
     107                 :            : {
     108                 :            :         struct swap_extent *se;
     109                 :            :         sector_t start_block;
     110                 :            :         sector_t nr_blocks;
     111                 :            :         int err = 0;
     112                 :            : 
     113                 :            :         /* Do not discard the swap header page! */
     114                 :            :         se = &si->first_swap_extent;
     115                 :          0 :         start_block = (se->start_block + 1) << (PAGE_SHIFT - 9);
     116                 :          0 :         nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
     117         [ #  # ]:          0 :         if (nr_blocks) {
     118                 :          0 :                 err = blkdev_issue_discard(si->bdev, start_block,
     119                 :            :                                 nr_blocks, GFP_KERNEL, 0);
     120         [ #  # ]:          0 :                 if (err)
     121                 :            :                         return err;
     122                 :          0 :                 cond_resched();
     123                 :            :         }
     124                 :            : 
     125         [ #  # ]:          0 :         list_for_each_entry(se, &si->first_swap_extent.list, list) {
     126                 :          0 :                 start_block = se->start_block << (PAGE_SHIFT - 9);
     127                 :          0 :                 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
     128                 :            : 
     129                 :          0 :                 err = blkdev_issue_discard(si->bdev, start_block,
     130                 :            :                                 nr_blocks, GFP_KERNEL, 0);
     131         [ #  # ]:          0 :                 if (err)
     132                 :            :                         break;
     133                 :            : 
     134                 :          0 :                 cond_resched();
     135                 :            :         }
     136                 :          0 :         return err;             /* That will often be -EOPNOTSUPP */
     137                 :            : }
     138                 :            : 
     139                 :            : /*
     140                 :            :  * swap allocation tell device that a cluster of swap can now be discarded,
     141                 :            :  * to allow the swap device to optimize its wear-levelling.
     142                 :            :  */
     143                 :          0 : static void discard_swap_cluster(struct swap_info_struct *si,
     144                 :            :                                  pgoff_t start_page, pgoff_t nr_pages)
     145                 :            : {
     146                 :          0 :         struct swap_extent *se = si->curr_swap_extent;
     147                 :            :         int found_extent = 0;
     148                 :            : 
     149         [ #  # ]:          0 :         while (nr_pages) {
     150                 :            :                 struct list_head *lh;
     151                 :            : 
     152 [ #  # ][ #  # ]:          0 :                 if (se->start_page <= start_page &&
     153                 :          0 :                     start_page < se->start_page + se->nr_pages) {
     154                 :          0 :                         pgoff_t offset = start_page - se->start_page;
     155                 :          0 :                         sector_t start_block = se->start_block + offset;
     156                 :          0 :                         sector_t nr_blocks = se->nr_pages - offset;
     157                 :            : 
     158         [ #  # ]:          0 :                         if (nr_blocks > nr_pages)
     159                 :            :                                 nr_blocks = nr_pages;
     160                 :          0 :                         start_page += nr_blocks;
     161                 :          0 :                         nr_pages -= nr_blocks;
     162                 :            : 
     163         [ #  # ]:          0 :                         if (!found_extent++)
     164                 :          0 :                                 si->curr_swap_extent = se;
     165                 :            : 
     166                 :          0 :                         start_block <<= PAGE_SHIFT - 9;
     167                 :          0 :                         nr_blocks <<= PAGE_SHIFT - 9;
     168         [ #  # ]:          0 :                         if (blkdev_issue_discard(si->bdev, start_block,
     169                 :            :                                     nr_blocks, GFP_NOIO, 0))
     170                 :            :                                 break;
     171                 :            :                 }
     172                 :            : 
     173                 :          0 :                 lh = se->list.next;
     174                 :            :                 se = list_entry(lh, struct swap_extent, list);
     175                 :            :         }
     176                 :          0 : }
     177                 :            : 
     178                 :            : #define SWAPFILE_CLUSTER        256
     179                 :            : #define LATENCY_LIMIT           256
     180                 :            : 
     181                 :            : static inline void cluster_set_flag(struct swap_cluster_info *info,
     182                 :            :         unsigned int flag)
     183                 :            : {
     184                 :          0 :         info->flags = flag;
     185                 :            : }
     186                 :            : 
     187                 :            : static inline unsigned int cluster_count(struct swap_cluster_info *info)
     188                 :            : {
     189                 :          0 :         return info->data;
     190                 :            : }
     191                 :            : 
     192                 :            : static inline void cluster_set_count(struct swap_cluster_info *info,
     193                 :            :                                      unsigned int c)
     194                 :            : {
     195                 :          0 :         info->data = c;
     196                 :            : }
     197                 :            : 
     198                 :            : static inline void cluster_set_count_flag(struct swap_cluster_info *info,
     199                 :            :                                          unsigned int c, unsigned int f)
     200                 :            : {
     201                 :          0 :         info->flags = f;
     202                 :          0 :         info->data = c;
     203                 :            : }
     204                 :            : 
     205                 :            : static inline unsigned int cluster_next(struct swap_cluster_info *info)
     206                 :            : {
     207                 :          0 :         return info->data;
     208                 :            : }
     209                 :            : 
     210                 :            : static inline void cluster_set_next(struct swap_cluster_info *info,
     211                 :            :                                     unsigned int n)
     212                 :            : {
     213                 :          0 :         info->data = n;
     214                 :            : }
     215                 :            : 
     216                 :            : static inline void cluster_set_next_flag(struct swap_cluster_info *info,
     217                 :            :                                          unsigned int n, unsigned int f)
     218                 :            : {
     219                 :          0 :         info->flags = f;
     220                 :          0 :         info->data = n;
     221                 :            : }
     222                 :            : 
     223                 :            : static inline bool cluster_is_free(struct swap_cluster_info *info)
     224                 :            : {
     225                 :          0 :         return info->flags & CLUSTER_FLAG_FREE;
     226                 :            : }
     227                 :            : 
     228                 :            : static inline bool cluster_is_null(struct swap_cluster_info *info)
     229                 :            : {
     230                 :          0 :         return info->flags & CLUSTER_FLAG_NEXT_NULL;
     231                 :            : }
     232                 :            : 
     233                 :            : static inline void cluster_set_null(struct swap_cluster_info *info)
     234                 :            : {
     235                 :        136 :         info->flags = CLUSTER_FLAG_NEXT_NULL;
     236                 :          0 :         info->data = 0;
     237                 :            : }
     238                 :            : 
     239                 :            : /* Add a cluster to discard list and schedule it to do discard */
     240                 :          0 : static void swap_cluster_schedule_discard(struct swap_info_struct *si,
     241                 :            :                 unsigned int idx)
     242                 :            : {
     243                 :            :         /*
     244                 :            :          * If scan_swap_map() can't find a free cluster, it will check
     245                 :            :          * si->swap_map directly. To make sure the discarding cluster isn't
     246                 :            :          * taken by scan_swap_map(), mark the swap entries bad (occupied). It
     247                 :            :          * will be cleared after discard
     248                 :            :          */
     249                 :          0 :         memset(si->swap_map + idx * SWAPFILE_CLUSTER,
     250                 :            :                         SWAP_MAP_BAD, SWAPFILE_CLUSTER);
     251                 :            : 
     252         [ #  # ]:          0 :         if (cluster_is_null(&si->discard_cluster_head)) {
     253                 :            :                 cluster_set_next_flag(&si->discard_cluster_head,
     254                 :            :                                                 idx, 0);
     255                 :            :                 cluster_set_next_flag(&si->discard_cluster_tail,
     256                 :            :                                                 idx, 0);
     257                 :            :         } else {
     258                 :            :                 unsigned int tail = cluster_next(&si->discard_cluster_tail);
     259                 :          0 :                 cluster_set_next(&si->cluster_info[tail], idx);
     260                 :            :                 cluster_set_next_flag(&si->discard_cluster_tail,
     261                 :            :                                                 idx, 0);
     262                 :            :         }
     263                 :            : 
     264                 :          0 :         schedule_work(&si->discard_work);
     265                 :          0 : }
     266                 :            : 
     267                 :            : /*
     268                 :            :  * Doing discard actually. After a cluster discard is finished, the cluster
     269                 :            :  * will be added to free cluster list. caller should hold si->lock.
     270                 :            : */
     271                 :          0 : static void swap_do_scheduled_discard(struct swap_info_struct *si)
     272                 :            : {
     273                 :            :         struct swap_cluster_info *info;
     274                 :            :         unsigned int idx;
     275                 :            : 
     276                 :          0 :         info = si->cluster_info;
     277                 :            : 
     278         [ #  # ]:          0 :         while (!cluster_is_null(&si->discard_cluster_head)) {
     279                 :            :                 idx = cluster_next(&si->discard_cluster_head);
     280                 :            : 
     281                 :          0 :                 cluster_set_next_flag(&si->discard_cluster_head,
     282                 :          0 :                                                 cluster_next(&info[idx]), 0);
     283         [ #  # ]:          0 :                 if (cluster_next(&si->discard_cluster_tail) == idx) {
     284                 :            :                         cluster_set_null(&si->discard_cluster_head);
     285                 :            :                         cluster_set_null(&si->discard_cluster_tail);
     286                 :            :                 }
     287                 :            :                 spin_unlock(&si->lock);
     288                 :            : 
     289                 :          0 :                 discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
     290                 :            :                                 SWAPFILE_CLUSTER);
     291                 :            : 
     292                 :            :                 spin_lock(&si->lock);
     293                 :            :                 cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE);
     294         [ #  # ]:          0 :                 if (cluster_is_null(&si->free_cluster_head)) {
     295                 :            :                         cluster_set_next_flag(&si->free_cluster_head,
     296                 :            :                                                 idx, 0);
     297                 :            :                         cluster_set_next_flag(&si->free_cluster_tail,
     298                 :            :                                                 idx, 0);
     299                 :            :                 } else {
     300                 :            :                         unsigned int tail;
     301                 :            : 
     302                 :            :                         tail = cluster_next(&si->free_cluster_tail);
     303                 :          0 :                         cluster_set_next(&info[tail], idx);
     304                 :            :                         cluster_set_next_flag(&si->free_cluster_tail,
     305                 :            :                                                 idx, 0);
     306                 :            :                 }
     307                 :          0 :                 memset(si->swap_map + idx * SWAPFILE_CLUSTER,
     308                 :            :                                 0, SWAPFILE_CLUSTER);
     309                 :            :         }
     310                 :          0 : }
     311                 :            : 
     312                 :          0 : static void swap_discard_work(struct work_struct *work)
     313                 :            : {
     314                 :            :         struct swap_info_struct *si;
     315                 :            : 
     316                 :          0 :         si = container_of(work, struct swap_info_struct, discard_work);
     317                 :            : 
     318                 :            :         spin_lock(&si->lock);
     319                 :          0 :         swap_do_scheduled_discard(si);
     320                 :            :         spin_unlock(&si->lock);
     321                 :          0 : }
     322                 :            : 
     323                 :            : /*
     324                 :            :  * The cluster corresponding to page_nr will be used. The cluster will be
     325                 :            :  * removed from free cluster list and its usage counter will be increased.
     326                 :            :  */
     327                 :          0 : static void inc_cluster_info_page(struct swap_info_struct *p,
     328                 :            :         struct swap_cluster_info *cluster_info, unsigned long page_nr)
     329                 :            : {
     330                 :       7906 :         unsigned long idx = page_nr / SWAPFILE_CLUSTER;
     331                 :            : 
     332         [ +  - ]:       7906 :         if (!cluster_info)
     333                 :          0 :                 return;
     334         [ #  # ]:       7906 :         if (cluster_is_free(&cluster_info[idx])) {
     335                 :            :                 VM_BUG_ON(cluster_next(&p->free_cluster_head) != idx);
     336                 :            :                 cluster_set_next_flag(&p->free_cluster_head,
     337                 :            :                         cluster_next(&cluster_info[idx]), 0);
     338         [ #  # ]:          0 :                 if (cluster_next(&p->free_cluster_tail) == idx) {
     339                 :            :                         cluster_set_null(&p->free_cluster_tail);
     340                 :            :                         cluster_set_null(&p->free_cluster_head);
     341                 :            :                 }
     342                 :            :                 cluster_set_count_flag(&cluster_info[idx], 0, 0);
     343                 :            :         }
     344                 :            : 
     345                 :            :         VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
     346                 :          0 :         cluster_set_count(&cluster_info[idx],
     347                 :            :                 cluster_count(&cluster_info[idx]) + 1);
     348                 :            : }
     349                 :            : 
     350                 :            : /*
     351                 :            :  * The cluster corresponding to page_nr decreases one usage. If the usage
     352                 :            :  * counter becomes 0, which means no page in the cluster is in using, we can
     353                 :            :  * optionally discard the cluster and add it to free cluster list.
     354                 :            :  */
     355                 :          0 : static void dec_cluster_info_page(struct swap_info_struct *p,
     356                 :            :         struct swap_cluster_info *cluster_info, unsigned long page_nr)
     357                 :            : {
     358                 :          0 :         unsigned long idx = page_nr / SWAPFILE_CLUSTER;
     359                 :            : 
     360         [ #  # ]:          0 :         if (!cluster_info)
     361                 :            :                 return;
     362                 :            : 
     363                 :            :         VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
     364                 :          0 :         cluster_set_count(&cluster_info[idx],
     365                 :          0 :                 cluster_count(&cluster_info[idx]) - 1);
     366                 :            : 
     367         [ #  # ]:          0 :         if (cluster_count(&cluster_info[idx]) == 0) {
     368                 :            :                 /*
     369                 :            :                  * If the swap is discardable, prepare discard the cluster
     370                 :            :                  * instead of free it immediately. The cluster will be freed
     371                 :            :                  * after discard.
     372                 :            :                  */
     373         [ #  # ]:          0 :                 if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
     374                 :            :                                  (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
     375                 :          0 :                         swap_cluster_schedule_discard(p, idx);
     376                 :          0 :                         return;
     377                 :            :                 }
     378                 :            : 
     379                 :            :                 cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
     380         [ #  # ]:          0 :                 if (cluster_is_null(&p->free_cluster_head)) {
     381                 :            :                         cluster_set_next_flag(&p->free_cluster_head, idx, 0);
     382                 :            :                         cluster_set_next_flag(&p->free_cluster_tail, idx, 0);
     383                 :            :                 } else {
     384                 :            :                         unsigned int tail = cluster_next(&p->free_cluster_tail);
     385                 :          0 :                         cluster_set_next(&cluster_info[tail], idx);
     386                 :            :                         cluster_set_next_flag(&p->free_cluster_tail, idx, 0);
     387                 :            :                 }
     388                 :            :         }
     389                 :            : }
     390                 :            : 
     391                 :            : /*
     392                 :            :  * It's possible scan_swap_map() uses a free cluster in the middle of free
     393                 :            :  * cluster list. Avoiding such abuse to avoid list corruption.
     394                 :            :  */
     395                 :            : static bool
     396                 :          0 : scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
     397                 :            :         unsigned long offset)
     398                 :            : {
     399                 :            :         struct percpu_cluster *percpu_cluster;
     400                 :            :         bool conflict;
     401                 :            : 
     402                 :          0 :         offset /= SWAPFILE_CLUSTER;
     403         [ #  # ]:          0 :         conflict = !cluster_is_null(&si->free_cluster_head) &&
     404 [ #  # ][ #  # ]:          0 :                 offset != cluster_next(&si->free_cluster_head) &&
     405                 :          0 :                 cluster_is_free(&si->cluster_info[offset]);
     406                 :            : 
     407         [ #  # ]:          0 :         if (!conflict)
     408                 :            :                 return false;
     409                 :            : 
     410                 :          0 :         percpu_cluster = this_cpu_ptr(si->percpu_cluster);
     411                 :            :         cluster_set_null(&percpu_cluster->index);
     412                 :          0 :         return true;
     413                 :            : }
     414                 :            : 
     415                 :            : /*
     416                 :            :  * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
     417                 :            :  * might involve allocating a new cluster for current CPU too.
     418                 :            :  */
     419                 :          0 : static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
     420                 :            :         unsigned long *offset, unsigned long *scan_base)
     421                 :            : {
     422                 :            :         struct percpu_cluster *cluster;
     423                 :            :         bool found_free;
     424                 :            :         unsigned long tmp;
     425                 :            : 
     426                 :            : new_cluster:
     427                 :          0 :         cluster = this_cpu_ptr(si->percpu_cluster);
     428         [ #  # ]:          0 :         if (cluster_is_null(&cluster->index)) {
     429         [ #  # ]:          0 :                 if (!cluster_is_null(&si->free_cluster_head)) {
     430                 :          0 :                         cluster->index = si->free_cluster_head;
     431                 :          0 :                         cluster->next = cluster_next(&cluster->index) *
     432                 :            :                                         SWAPFILE_CLUSTER;
     433         [ #  # ]:          0 :                 } else if (!cluster_is_null(&si->discard_cluster_head)) {
     434                 :            :                         /*
     435                 :            :                          * we don't have free cluster but have some clusters in
     436                 :            :                          * discarding, do discard now and reclaim them
     437                 :            :                          */
     438                 :          0 :                         swap_do_scheduled_discard(si);
     439                 :          0 :                         *scan_base = *offset = si->cluster_next;
     440                 :          0 :                         goto new_cluster;
     441                 :            :                 } else
     442                 :          0 :                         return;
     443                 :            :         }
     444                 :            : 
     445                 :            :         found_free = false;
     446                 :            : 
     447                 :            :         /*
     448                 :            :          * Other CPUs can use our cluster if they can't find a free cluster,
     449                 :            :          * check if there is still free entry in the cluster
     450                 :            :          */
     451                 :          0 :         tmp = cluster->next;
     452 [ #  # ][ #  # ]:          0 :         while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) *
     453                 :            :                SWAPFILE_CLUSTER) {
     454         [ #  # ]:          0 :                 if (!si->swap_map[tmp]) {
     455                 :            :                         found_free = true;
     456                 :            :                         break;
     457                 :            :                 }
     458                 :          0 :                 tmp++;
     459                 :            :         }
     460         [ #  # ]:          0 :         if (!found_free) {
     461                 :            :                 cluster_set_null(&cluster->index);
     462                 :            :                 goto new_cluster;
     463                 :            :         }
     464                 :          0 :         cluster->next = tmp + 1;
     465                 :          0 :         *offset = tmp;
     466                 :          0 :         *scan_base = tmp;
     467                 :            : }
     468                 :            : 
     469                 :          0 : static unsigned long scan_swap_map(struct swap_info_struct *si,
     470                 :            :                                    unsigned char usage)
     471                 :            : {
     472                 :            :         unsigned long offset;
     473                 :            :         unsigned long scan_base;
     474                 :            :         unsigned long last_in_cluster = 0;
     475                 :            :         int latency_ration = LATENCY_LIMIT;
     476                 :            : 
     477                 :            :         /*
     478                 :            :          * We try to cluster swap pages by allocating them sequentially
     479                 :            :          * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this
     480                 :            :          * way, however, we resort to first-free allocation, starting
     481                 :            :          * a new cluster.  This prevents us from scattering swap pages
     482                 :            :          * all over the entire swap partition, so that we reduce
     483                 :            :          * overall disk seek times between swap pages.  -- sct
     484                 :            :          * But we do now try to find an empty cluster.  -Andrea
     485                 :            :          * And we let swap pages go all over an SSD partition.  Hugh
     486                 :            :          */
     487                 :            : 
     488                 :          0 :         si->flags += SWP_SCANNING;
     489                 :          0 :         scan_base = offset = si->cluster_next;
     490                 :            : 
     491                 :            :         /* SSD algorithm */
     492         [ #  # ]:          0 :         if (si->cluster_info) {
     493                 :          0 :                 scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
     494                 :          0 :                 goto checks;
     495                 :            :         }
     496                 :            : 
     497         [ #  # ]:          0 :         if (unlikely(!si->cluster_nr--)) {
     498         [ #  # ]:          0 :                 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
     499                 :          0 :                         si->cluster_nr = SWAPFILE_CLUSTER - 1;
     500                 :          0 :                         goto checks;
     501                 :            :                 }
     502                 :            : 
     503                 :            :                 spin_unlock(&si->lock);
     504                 :            : 
     505                 :            :                 /*
     506                 :            :                  * If seek is expensive, start searching for new cluster from
     507                 :            :                  * start of partition, to minimize the span of allocated swap.
     508                 :            :                  * But if seek is cheap, search from our current position, so
     509                 :            :                  * that swap is allocated from all over the partition: if the
     510                 :            :                  * Flash Translation Layer only remaps within limited zones,
     511                 :            :                  * we don't want to wear out the first zone too quickly.
     512                 :            :                  */
     513         [ #  # ]:          0 :                 if (!(si->flags & SWP_SOLIDSTATE))
     514                 :          0 :                         scan_base = offset = si->lowest_bit;
     515                 :          0 :                 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
     516                 :            : 
     517                 :            :                 /* Locate the first empty (unaligned) cluster */
     518         [ #  # ]:          0 :                 for (; last_in_cluster <= si->highest_bit; offset++) {
     519         [ #  # ]:          0 :                         if (si->swap_map[offset])
     520                 :          0 :                                 last_in_cluster = offset + SWAPFILE_CLUSTER;
     521         [ #  # ]:          0 :                         else if (offset == last_in_cluster) {
     522                 :            :                                 spin_lock(&si->lock);
     523                 :          0 :                                 offset -= SWAPFILE_CLUSTER - 1;
     524                 :          0 :                                 si->cluster_next = offset;
     525                 :          0 :                                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
     526                 :          0 :                                 goto checks;
     527                 :            :                         }
     528         [ #  # ]:          0 :                         if (unlikely(--latency_ration < 0)) {
     529                 :          0 :                                 cond_resched();
     530                 :            :                                 latency_ration = LATENCY_LIMIT;
     531                 :            :                         }
     532                 :            :                 }
     533                 :            : 
     534                 :          0 :                 offset = si->lowest_bit;
     535                 :          0 :                 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
     536                 :            : 
     537                 :            :                 /* Locate the first empty (unaligned) cluster */
     538         [ #  # ]:          0 :                 for (; last_in_cluster < scan_base; offset++) {
     539         [ #  # ]:          0 :                         if (si->swap_map[offset])
     540                 :          0 :                                 last_in_cluster = offset + SWAPFILE_CLUSTER;
     541         [ #  # ]:          0 :                         else if (offset == last_in_cluster) {
     542                 :            :                                 spin_lock(&si->lock);
     543                 :          0 :                                 offset -= SWAPFILE_CLUSTER - 1;
     544                 :          0 :                                 si->cluster_next = offset;
     545                 :          0 :                                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
     546                 :          0 :                                 goto checks;
     547                 :            :                         }
     548         [ #  # ]:          0 :                         if (unlikely(--latency_ration < 0)) {
     549                 :          0 :                                 cond_resched();
     550                 :            :                                 latency_ration = LATENCY_LIMIT;
     551                 :            :                         }
     552                 :            :                 }
     553                 :            : 
     554                 :          0 :                 offset = scan_base;
     555                 :            :                 spin_lock(&si->lock);
     556                 :          0 :                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
     557                 :            :         }
     558                 :            : 
     559                 :            : checks:
     560         [ #  # ]:          0 :         if (si->cluster_info) {
     561         [ #  # ]:          0 :                 while (scan_swap_map_ssd_cluster_conflict(si, offset))
     562                 :          0 :                         scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
     563                 :            :         }
     564         [ #  # ]:          0 :         if (!(si->flags & SWP_WRITEOK))
     565                 :            :                 goto no_page;
     566         [ #  # ]:          0 :         if (!si->highest_bit)
     567                 :            :                 goto no_page;
     568         [ #  # ]:          0 :         if (offset > si->highest_bit)
     569                 :          0 :                 scan_base = offset = si->lowest_bit;
     570                 :            : 
     571                 :            :         /* reuse swap entry of cache-only swap if not busy. */
     572 [ #  # ][ #  # ]:          0 :         if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
     573                 :            :                 int swap_was_freed;
     574                 :            :                 spin_unlock(&si->lock);
     575                 :          0 :                 swap_was_freed = __try_to_reclaim_swap(si, offset);
     576                 :            :                 spin_lock(&si->lock);
     577                 :            :                 /* entry was freed successfully, try to use this again */
     578         [ #  # ]:          0 :                 if (swap_was_freed)
     579                 :            :                         goto checks;
     580                 :            :                 goto scan; /* check next one */
     581                 :            :         }
     582                 :            : 
     583         [ #  # ]:          0 :         if (si->swap_map[offset])
     584                 :            :                 goto scan;
     585                 :            : 
     586         [ #  # ]:          0 :         if (offset == si->lowest_bit)
     587                 :          0 :                 si->lowest_bit++;
     588         [ #  # ]:          0 :         if (offset == si->highest_bit)
     589                 :          0 :                 si->highest_bit--;
     590                 :          0 :         si->inuse_pages++;
     591         [ #  # ]:          0 :         if (si->inuse_pages == si->pages) {
     592                 :          0 :                 si->lowest_bit = si->max;
     593                 :          0 :                 si->highest_bit = 0;
     594                 :            :         }
     595                 :          0 :         si->swap_map[offset] = usage;
     596                 :          0 :         inc_cluster_info_page(si, si->cluster_info, offset);
     597                 :          0 :         si->cluster_next = offset + 1;
     598                 :          0 :         si->flags -= SWP_SCANNING;
     599                 :            : 
     600                 :          0 :         return offset;
     601                 :            : 
     602                 :            : scan:
     603                 :            :         spin_unlock(&si->lock);
     604         [ #  # ]:          0 :         while (++offset <= si->highest_bit) {
     605         [ #  # ]:          0 :                 if (!si->swap_map[offset]) {
     606                 :            :                         spin_lock(&si->lock);
     607                 :            :                         goto checks;
     608                 :            :                 }
     609 [ #  # ][ #  # ]:          0 :                 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
     610                 :            :                         spin_lock(&si->lock);
     611                 :            :                         goto checks;
     612                 :            :                 }
     613         [ #  # ]:          0 :                 if (unlikely(--latency_ration < 0)) {
     614                 :          0 :                         cond_resched();
     615                 :            :                         latency_ration = LATENCY_LIMIT;
     616                 :            :                 }
     617                 :            :         }
     618                 :          0 :         offset = si->lowest_bit;
     619         [ #  # ]:          0 :         while (++offset < scan_base) {
     620         [ #  # ]:          0 :                 if (!si->swap_map[offset]) {
     621                 :            :                         spin_lock(&si->lock);
     622                 :            :                         goto checks;
     623                 :            :                 }
     624 [ #  # ][ #  # ]:          0 :                 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
     625                 :            :                         spin_lock(&si->lock);
     626                 :            :                         goto checks;
     627                 :            :                 }
     628         [ #  # ]:          0 :                 if (unlikely(--latency_ration < 0)) {
     629                 :          0 :                         cond_resched();
     630                 :            :                         latency_ration = LATENCY_LIMIT;
     631                 :            :                 }
     632                 :            :         }
     633                 :            :         spin_lock(&si->lock);
     634                 :            : 
     635                 :            : no_page:
     636                 :          0 :         si->flags -= SWP_SCANNING;
     637                 :          0 :         return 0;
     638                 :            : }
     639                 :            : 
     640                 :          0 : swp_entry_t get_swap_page(void)
     641                 :            : {
     642                 :            :         struct swap_info_struct *si;
     643                 :            :         pgoff_t offset;
     644                 :            :         int type, next;
     645                 :            :         int wrapped = 0;
     646                 :            :         int hp_index;
     647                 :            : 
     648                 :            :         spin_lock(&swap_lock);
     649         [ #  # ]:          0 :         if (atomic_long_read(&nr_swap_pages) <= 0)
     650                 :            :                 goto noswap;
     651                 :            :         atomic_long_dec(&nr_swap_pages);
     652                 :            : 
     653         [ #  # ]:          0 :         for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
     654                 :          0 :                 hp_index = atomic_xchg(&highest_priority_index, -1);
     655                 :            :                 /*
     656                 :            :                  * highest_priority_index records current highest priority swap
     657                 :            :                  * type which just frees swap entries. If its priority is
     658                 :            :                  * higher than that of swap_list.next swap type, we use it.  It
     659                 :            :                  * isn't protected by swap_lock, so it can be an invalid value
     660                 :            :                  * if the corresponding swap type is swapoff. We double check
     661                 :            :                  * the flags here. It's even possible the swap type is swapoff
     662                 :            :                  * and swapon again and its priority is changed. In such rare
     663                 :            :                  * case, low prority swap type might be used, but eventually
     664                 :            :                  * high priority swap will be used after several rounds of
     665                 :            :                  * swap.
     666                 :            :                  */
     667 [ #  # ][ #  # ]:          0 :                 if (hp_index != -1 && hp_index != type &&
     668         [ #  # ]:          0 :                     swap_info[type]->prio < swap_info[hp_index]->prio &&
     669                 :          0 :                     (swap_info[hp_index]->flags & SWP_WRITEOK)) {
     670                 :            :                         type = hp_index;
     671                 :          0 :                         swap_list.next = type;
     672                 :            :                 }
     673                 :            : 
     674                 :          0 :                 si = swap_info[type];
     675                 :          0 :                 next = si->next;
     676 [ #  # ][ #  # ]:          0 :                 if (next < 0 ||
     677         [ #  # ]:          0 :                     (!wrapped && si->prio != swap_info[next]->prio)) {
     678                 :          0 :                         next = swap_list.head;
     679                 :          0 :                         wrapped++;
     680                 :            :                 }
     681                 :            : 
     682                 :            :                 spin_lock(&si->lock);
     683         [ #  # ]:          0 :                 if (!si->highest_bit) {
     684                 :            :                         spin_unlock(&si->lock);
     685                 :          0 :                         continue;
     686                 :            :                 }
     687         [ #  # ]:          0 :                 if (!(si->flags & SWP_WRITEOK)) {
     688                 :            :                         spin_unlock(&si->lock);
     689                 :          0 :                         continue;
     690                 :            :                 }
     691                 :            : 
     692                 :          0 :                 swap_list.next = next;
     693                 :            : 
     694                 :            :                 spin_unlock(&swap_lock);
     695                 :            :                 /* This is called for allocating swap entry for cache */
     696                 :          0 :                 offset = scan_swap_map(si, SWAP_HAS_CACHE);
     697                 :            :                 spin_unlock(&si->lock);
     698         [ #  # ]:          0 :                 if (offset)
     699                 :          0 :                         return swp_entry(type, offset);
     700                 :            :                 spin_lock(&swap_lock);
     701                 :          0 :                 next = swap_list.next;
     702                 :            :         }
     703                 :            : 
     704                 :            :         atomic_long_inc(&nr_swap_pages);
     705                 :            : noswap:
     706                 :            :         spin_unlock(&swap_lock);
     707                 :          0 :         return (swp_entry_t) {0};
     708                 :            : }
     709                 :            : 
     710                 :            : /* The only caller of this function is now suspend routine */
     711                 :          0 : swp_entry_t get_swap_page_of_type(int type)
     712                 :            : {
     713                 :            :         struct swap_info_struct *si;
     714                 :            :         pgoff_t offset;
     715                 :            : 
     716                 :          0 :         si = swap_info[type];
     717                 :            :         spin_lock(&si->lock);
     718 [ #  # ][ #  # ]:          0 :         if (si && (si->flags & SWP_WRITEOK)) {
     719                 :            :                 atomic_long_dec(&nr_swap_pages);
     720                 :            :                 /* This is called for allocating swap entry, not cache */
     721                 :          0 :                 offset = scan_swap_map(si, 1);
     722         [ #  # ]:          0 :                 if (offset) {
     723                 :            :                         spin_unlock(&si->lock);
     724                 :          0 :                         return swp_entry(type, offset);
     725                 :            :                 }
     726                 :            :                 atomic_long_inc(&nr_swap_pages);
     727                 :            :         }
     728                 :            :         spin_unlock(&si->lock);
     729                 :          0 :         return (swp_entry_t) {0};
     730                 :            : }
     731                 :            : 
     732                 :          0 : static struct swap_info_struct *swap_info_get(swp_entry_t entry)
     733                 :            : {
     734                 :            :         struct swap_info_struct *p;
     735                 :            :         unsigned long offset, type;
     736                 :            : 
     737         [ #  # ]:          0 :         if (!entry.val)
     738                 :            :                 goto out;
     739                 :            :         type = swp_type(entry);
     740         [ #  # ]:          0 :         if (type >= nr_swapfiles)
     741                 :            :                 goto bad_nofile;
     742                 :          0 :         p = swap_info[type];
     743         [ #  # ]:          0 :         if (!(p->flags & SWP_USED))
     744                 :            :                 goto bad_device;
     745                 :            :         offset = swp_offset(entry);
     746         [ #  # ]:          0 :         if (offset >= p->max)
     747                 :            :                 goto bad_offset;
     748         [ #  # ]:          0 :         if (!p->swap_map[offset])
     749                 :            :                 goto bad_free;
     750                 :            :         spin_lock(&p->lock);
     751                 :          0 :         return p;
     752                 :            : 
     753                 :            : bad_free:
     754                 :          0 :         pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
     755                 :          0 :         goto out;
     756                 :            : bad_offset:
     757                 :          0 :         pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
     758                 :          0 :         goto out;
     759                 :            : bad_device:
     760                 :          0 :         pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
     761                 :          0 :         goto out;
     762                 :            : bad_nofile:
     763                 :          0 :         pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
     764                 :            : out:
     765                 :            :         return NULL;
     766                 :            : }
     767                 :            : 
     768                 :            : /*
     769                 :            :  * This swap type frees swap entry, check if it is the highest priority swap
     770                 :            :  * type which just frees swap entry. get_swap_page() uses
     771                 :            :  * highest_priority_index to search highest priority swap type. The
     772                 :            :  * swap_info_struct.lock can't protect us if there are multiple swap types
     773                 :            :  * active, so we use atomic_cmpxchg.
     774                 :            :  */
     775                 :          0 : static void set_highest_priority_index(int type)
     776                 :            : {
     777                 :            :         int old_hp_index, new_hp_index;
     778                 :            : 
     779                 :            :         do {
     780                 :          0 :                 old_hp_index = atomic_read(&highest_priority_index);
     781 [ #  # ][ #  # ]:          0 :                 if (old_hp_index != -1 &&
     782                 :          0 :                         swap_info[old_hp_index]->prio >= swap_info[type]->prio)
     783                 :            :                         break;
     784                 :            :                 new_hp_index = type;
     785                 :            :         } while (atomic_cmpxchg(&highest_priority_index,
     786         [ #  # ]:          0 :                 old_hp_index, new_hp_index) != old_hp_index);
     787                 :          0 : }
     788                 :            : 
     789                 :          0 : static unsigned char swap_entry_free(struct swap_info_struct *p,
     790                 :            :                                      swp_entry_t entry, unsigned char usage)
     791                 :            : {
     792                 :            :         unsigned long offset = swp_offset(entry);
     793                 :            :         unsigned char count;
     794                 :            :         unsigned char has_cache;
     795                 :            : 
     796                 :          0 :         count = p->swap_map[offset];
     797                 :          0 :         has_cache = count & SWAP_HAS_CACHE;
     798                 :          0 :         count &= ~SWAP_HAS_CACHE;
     799                 :            : 
     800         [ #  # ]:          0 :         if (usage == SWAP_HAS_CACHE) {
     801                 :            :                 VM_BUG_ON(!has_cache);
     802                 :            :                 has_cache = 0;
     803         [ #  # ]:          0 :         } else if (count == SWAP_MAP_SHMEM) {
     804                 :            :                 /*
     805                 :            :                  * Or we could insist on shmem.c using a special
     806                 :            :                  * swap_shmem_free() and free_shmem_swap_and_cache()...
     807                 :            :                  */
     808                 :            :                 count = 0;
     809         [ #  # ]:          0 :         } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
     810         [ #  # ]:          0 :                 if (count == COUNT_CONTINUED) {
     811         [ #  # ]:          0 :                         if (swap_count_continued(p, offset, count))
     812                 :            :                                 count = SWAP_MAP_MAX | COUNT_CONTINUED;
     813                 :            :                         else
     814                 :            :                                 count = SWAP_MAP_MAX;
     815                 :            :                 } else
     816                 :          0 :                         count--;
     817                 :            :         }
     818                 :            : 
     819                 :            :         if (!count)
     820                 :            :                 mem_cgroup_uncharge_swap(entry);
     821                 :            : 
     822                 :          0 :         usage = count | has_cache;
     823                 :          0 :         p->swap_map[offset] = usage;
     824                 :            : 
     825                 :            :         /* free if no reference */
     826         [ #  # ]:          0 :         if (!usage) {
     827                 :          0 :                 dec_cluster_info_page(p, p->cluster_info, offset);
     828         [ #  # ]:          0 :                 if (offset < p->lowest_bit)
     829                 :          0 :                         p->lowest_bit = offset;
     830         [ #  # ]:          0 :                 if (offset > p->highest_bit)
     831                 :          0 :                         p->highest_bit = offset;
     832                 :          0 :                 set_highest_priority_index(p->type);
     833                 :            :                 atomic_long_inc(&nr_swap_pages);
     834                 :          0 :                 p->inuse_pages--;
     835                 :            :                 frontswap_invalidate_page(p->type, offset);
     836         [ #  # ]:          0 :                 if (p->flags & SWP_BLKDEV) {
     837                 :          0 :                         struct gendisk *disk = p->bdev->bd_disk;
     838         [ #  # ]:          0 :                         if (disk->fops->swap_slot_free_notify)
     839                 :          0 :                                 disk->fops->swap_slot_free_notify(p->bdev,
     840                 :            :                                                                   offset);
     841                 :            :                 }
     842                 :            :         }
     843                 :            : 
     844                 :          0 :         return usage;
     845                 :            : }
     846                 :            : 
     847                 :            : /*
     848                 :            :  * Caller has made sure that the swap device corresponding to entry
     849                 :            :  * is still around or has not been recycled.
     850                 :            :  */
     851                 :          0 : void swap_free(swp_entry_t entry)
     852                 :            : {
     853                 :            :         struct swap_info_struct *p;
     854                 :            : 
     855                 :          0 :         p = swap_info_get(entry);
     856         [ #  # ]:          0 :         if (p) {
     857                 :          0 :                 swap_entry_free(p, entry, 1);
     858                 :            :                 spin_unlock(&p->lock);
     859                 :            :         }
     860                 :          0 : }
     861                 :            : 
     862                 :            : /*
     863                 :            :  * Called after dropping swapcache to decrease refcnt to swap entries.
     864                 :            :  */
     865                 :          0 : void swapcache_free(swp_entry_t entry, struct page *page)
     866                 :            : {
     867                 :            :         struct swap_info_struct *p;
     868                 :            :         unsigned char count;
     869                 :            : 
     870                 :          0 :         p = swap_info_get(entry);
     871         [ #  # ]:          0 :         if (p) {
     872                 :          0 :                 count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
     873                 :            :                 if (page)
     874                 :            :                         mem_cgroup_uncharge_swapcache(page, entry, count != 0);
     875                 :            :                 spin_unlock(&p->lock);
     876                 :            :         }
     877                 :          0 : }
     878                 :            : 
     879                 :            : /*
     880                 :            :  * How many references to page are currently swapped out?
     881                 :            :  * This does not give an exact answer when swap count is continued,
     882                 :            :  * but does include the high COUNT_CONTINUED flag to allow for that.
     883                 :            :  */
     884                 :          0 : int page_swapcount(struct page *page)
     885                 :            : {
     886                 :            :         int count = 0;
     887                 :            :         struct swap_info_struct *p;
     888                 :            :         swp_entry_t entry;
     889                 :            : 
     890                 :          0 :         entry.val = page_private(page);
     891                 :          0 :         p = swap_info_get(entry);
     892         [ #  # ]:          0 :         if (p) {
     893                 :          0 :                 count = swap_count(p->swap_map[swp_offset(entry)]);
     894                 :            :                 spin_unlock(&p->lock);
     895                 :            :         }
     896                 :          0 :         return count;
     897                 :            : }
     898                 :            : 
     899                 :            : /*
     900                 :            :  * We can write to an anon page without COW if there are no other references
     901                 :            :  * to it.  And as a side-effect, free up its swap: because the old content
     902                 :            :  * on disk will never be read, and seeking back there to write new content
     903                 :            :  * later would only waste time away from clustering.
     904                 :            :  */
     905                 :          0 : int reuse_swap_page(struct page *page)
     906                 :            : {
     907                 :            :         int count;
     908                 :            : 
     909                 :            :         VM_BUG_ON(!PageLocked(page));
     910                 :            :         if (unlikely(PageKsm(page)))
     911                 :            :                 return 0;
     912                 :            :         count = page_mapcount(page);
     913 [ +  + ][ -  + ]:   17563594 :         if (count <= 1 && PageSwapCache(page)) {
     914                 :          0 :                 count += page_swapcount(page);
     915 [ #  # ][ #  # ]:          0 :                 if (count == 1 && !PageWriteback(page)) {
     916                 :          0 :                         delete_from_swap_cache(page);
     917                 :            :                         SetPageDirty(page);
     918                 :            :                 }
     919                 :            :         }
     920                 :          0 :         return count <= 1;
     921                 :            : }
     922                 :            : 
     923                 :            : /*
     924                 :            :  * If swap is getting full, or if there are no more mappings of this page,
     925                 :            :  * then try_to_free_swap is called to free its swap space.
     926                 :            :  */
     927                 :          0 : int try_to_free_swap(struct page *page)
     928                 :            : {
     929                 :            :         VM_BUG_ON(!PageLocked(page));
     930                 :            : 
     931         [ #  # ]:          0 :         if (!PageSwapCache(page))
     932                 :            :                 return 0;
     933         [ #  # ]:          0 :         if (PageWriteback(page))
     934                 :            :                 return 0;
     935         [ #  # ]:          0 :         if (page_swapcount(page))
     936                 :            :                 return 0;
     937                 :            : 
     938                 :            :         /*
     939                 :            :          * Once hibernation has begun to create its image of memory,
     940                 :            :          * there's a danger that one of the calls to try_to_free_swap()
     941                 :            :          * - most probably a call from __try_to_reclaim_swap() while
     942                 :            :          * hibernation is allocating its own swap pages for the image,
     943                 :            :          * but conceivably even a call from memory reclaim - will free
     944                 :            :          * the swap from a page which has already been recorded in the
     945                 :            :          * image as a clean swapcache page, and then reuse its swap for
     946                 :            :          * another page of the image.  On waking from hibernation, the
     947                 :            :          * original page might be freed under memory pressure, then
     948                 :            :          * later read back in from swap, now with the wrong data.
     949                 :            :          *
     950                 :            :          * Hibernation suspends storage while it is writing the image
     951                 :            :          * to disk so check that here.
     952                 :            :          */
     953         [ #  # ]:          0 :         if (pm_suspended_storage())
     954                 :            :                 return 0;
     955                 :            : 
     956                 :          0 :         delete_from_swap_cache(page);
     957                 :            :         SetPageDirty(page);
     958                 :          0 :         return 1;
     959                 :            : }
     960                 :            : 
     961                 :            : /*
     962                 :            :  * Free the swap entry like above, but also try to
     963                 :            :  * free the page cache entry if it is the last user.
     964                 :            :  */
     965                 :          0 : int free_swap_and_cache(swp_entry_t entry)
     966                 :            : {
     967                 :            :         struct swap_info_struct *p;
     968                 :            :         struct page *page = NULL;
     969                 :            : 
     970         [ #  # ]:          0 :         if (non_swap_entry(entry))
     971                 :            :                 return 1;
     972                 :            : 
     973                 :          0 :         p = swap_info_get(entry);
     974         [ #  # ]:          0 :         if (p) {
     975         [ #  # ]:          0 :                 if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
     976                 :          0 :                         page = find_get_page(swap_address_space(entry),
     977                 :            :                                                 entry.val);
     978   [ #  #  #  # ]:          0 :                         if (page && !trylock_page(page)) {
     979                 :          0 :                                 page_cache_release(page);
     980                 :            :                                 page = NULL;
     981                 :            :                         }
     982                 :            :                 }
     983                 :            :                 spin_unlock(&p->lock);
     984                 :            :         }
     985         [ #  # ]:          0 :         if (page) {
     986                 :            :                 /*
     987                 :            :                  * Not mapped elsewhere, or swap space full? Free it!
     988                 :            :                  * Also recheck PageSwapCache now page is locked (above).
     989                 :            :                  */
     990 [ #  # ][ #  # ]:          0 :                 if (PageSwapCache(page) && !PageWriteback(page) &&
                 [ #  # ]
     991         [ #  # ]:          0 :                                 (!page_mapped(page) || vm_swap_full())) {
     992                 :          0 :                         delete_from_swap_cache(page);
     993                 :            :                         SetPageDirty(page);
     994                 :            :                 }
     995                 :          0 :                 unlock_page(page);
     996                 :          0 :                 page_cache_release(page);
     997                 :            :         }
     998                 :          0 :         return p != NULL;
     999                 :            : }
    1000                 :            : 
    1001                 :            : #ifdef CONFIG_HIBERNATION
    1002                 :            : /*
    1003                 :            :  * Find the swap type that corresponds to given device (if any).
    1004                 :            :  *
    1005                 :            :  * @offset - number of the PAGE_SIZE-sized block of the device, starting
    1006                 :            :  * from 0, in which the swap header is expected to be located.
    1007                 :            :  *
    1008                 :            :  * This is needed for the suspend to disk (aka swsusp).
    1009                 :            :  */
    1010                 :            : int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
    1011                 :            : {
    1012                 :            :         struct block_device *bdev = NULL;
    1013                 :            :         int type;
    1014                 :            : 
    1015                 :            :         if (device)
    1016                 :            :                 bdev = bdget(device);
    1017                 :            : 
    1018                 :            :         spin_lock(&swap_lock);
    1019                 :            :         for (type = 0; type < nr_swapfiles; type++) {
    1020                 :            :                 struct swap_info_struct *sis = swap_info[type];
    1021                 :            : 
    1022                 :            :                 if (!(sis->flags & SWP_WRITEOK))
    1023                 :            :                         continue;
    1024                 :            : 
    1025                 :            :                 if (!bdev) {
    1026                 :            :                         if (bdev_p)
    1027                 :            :                                 *bdev_p = bdgrab(sis->bdev);
    1028                 :            : 
    1029                 :            :                         spin_unlock(&swap_lock);
    1030                 :            :                         return type;
    1031                 :            :                 }
    1032                 :            :                 if (bdev == sis->bdev) {
    1033                 :            :                         struct swap_extent *se = &sis->first_swap_extent;
    1034                 :            : 
    1035                 :            :                         if (se->start_block == offset) {
    1036                 :            :                                 if (bdev_p)
    1037                 :            :                                         *bdev_p = bdgrab(sis->bdev);
    1038                 :            : 
    1039                 :            :                                 spin_unlock(&swap_lock);
    1040                 :            :                                 bdput(bdev);
    1041                 :            :                                 return type;
    1042                 :            :                         }
    1043                 :            :                 }
    1044                 :            :         }
    1045                 :            :         spin_unlock(&swap_lock);
    1046                 :            :         if (bdev)
    1047                 :            :                 bdput(bdev);
    1048                 :            : 
    1049                 :            :         return -ENODEV;
    1050                 :            : }
    1051                 :            : 
    1052                 :            : /*
    1053                 :            :  * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
    1054                 :            :  * corresponding to given index in swap_info (swap type).
    1055                 :            :  */
    1056                 :            : sector_t swapdev_block(int type, pgoff_t offset)
    1057                 :            : {
    1058                 :            :         struct block_device *bdev;
    1059                 :            : 
    1060                 :            :         if ((unsigned int)type >= nr_swapfiles)
    1061                 :            :                 return 0;
    1062                 :            :         if (!(swap_info[type]->flags & SWP_WRITEOK))
    1063                 :            :                 return 0;
    1064                 :            :         return map_swap_entry(swp_entry(type, offset), &bdev);
    1065                 :            : }
    1066                 :            : 
    1067                 :            : /*
    1068                 :            :  * Return either the total number of swap pages of given type, or the number
    1069                 :            :  * of free pages of that type (depending on @free)
    1070                 :            :  *
    1071                 :            :  * This is needed for software suspend
    1072                 :            :  */
    1073                 :            : unsigned int count_swap_pages(int type, int free)
    1074                 :            : {
    1075                 :            :         unsigned int n = 0;
    1076                 :            : 
    1077                 :            :         spin_lock(&swap_lock);
    1078                 :            :         if ((unsigned int)type < nr_swapfiles) {
    1079                 :            :                 struct swap_info_struct *sis = swap_info[type];
    1080                 :            : 
    1081                 :            :                 spin_lock(&sis->lock);
    1082                 :            :                 if (sis->flags & SWP_WRITEOK) {
    1083                 :            :                         n = sis->pages;
    1084                 :            :                         if (free)
    1085                 :            :                                 n -= sis->inuse_pages;
    1086                 :            :                 }
    1087                 :            :                 spin_unlock(&sis->lock);
    1088                 :            :         }
    1089                 :            :         spin_unlock(&swap_lock);
    1090                 :            :         return n;
    1091                 :            : }
    1092                 :            : #endif /* CONFIG_HIBERNATION */
    1093                 :            : 
    1094                 :            : static inline int maybe_same_pte(pte_t pte, pte_t swp_pte)
    1095                 :            : {
    1096                 :            : #ifdef CONFIG_MEM_SOFT_DIRTY
    1097                 :            :         /*
    1098                 :            :          * When pte keeps soft dirty bit the pte generated
    1099                 :            :          * from swap entry does not has it, still it's same
    1100                 :            :          * pte from logical point of view.
    1101                 :            :          */
    1102                 :            :         pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte);
    1103                 :            :         return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty);
    1104                 :            : #else
    1105                 :            :         return pte_same(pte, swp_pte);
    1106                 :            : #endif
    1107                 :            : }
    1108                 :            : 
    1109                 :            : /*
    1110                 :            :  * No need to decide whether this PTE shares the swap entry with others,
    1111                 :            :  * just let do_wp_page work it out if a write is requested later - to
    1112                 :            :  * force COW, vm_page_prot omits write permission from any private vma.
    1113                 :            :  */
    1114                 :          0 : static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
    1115                 :            :                 unsigned long addr, swp_entry_t entry, struct page *page)
    1116                 :            : {
    1117                 :            :         struct page *swapcache;
    1118                 :            :         struct mem_cgroup *memcg;
    1119                 :            :         spinlock_t *ptl;
    1120                 :            :         pte_t *pte;
    1121                 :            :         int ret = 1;
    1122                 :            : 
    1123                 :            :         swapcache = page;
    1124                 :            :         page = ksm_might_need_to_copy(page, vma, addr);
    1125         [ #  # ]:          0 :         if (unlikely(!page))
    1126                 :            :                 return -ENOMEM;
    1127                 :            : 
    1128                 :            :         if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
    1129                 :            :                                          GFP_KERNEL, &memcg)) {
    1130                 :            :                 ret = -ENOMEM;
    1131                 :            :                 goto out_nolock;
    1132                 :            :         }
    1133                 :            : 
    1134                 :          0 :         pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
    1135         [ #  # ]:          0 :         if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
    1136                 :            :                 mem_cgroup_cancel_charge_swapin(memcg);
    1137                 :            :                 ret = 0;
    1138                 :            :                 goto out;
    1139                 :            :         }
    1140                 :            : 
    1141                 :          0 :         dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
    1142                 :          0 :         inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
    1143                 :            :         get_page(page);
    1144                 :          0 :         set_pte_at(vma->vm_mm, addr, pte,
    1145                 :          0 :                    pte_mkold(mk_pte(page, vma->vm_page_prot)));
    1146                 :            :         if (page == swapcache)
    1147                 :          0 :                 page_add_anon_rmap(page, vma, addr);
    1148                 :            :         else /* ksm created a completely new copy */
    1149                 :            :                 page_add_new_anon_rmap(page, vma, addr);
    1150                 :            :         mem_cgroup_commit_charge_swapin(page, memcg);
    1151                 :          0 :         swap_free(entry);
    1152                 :            :         /*
    1153                 :            :          * Move the page to the active list so it is not
    1154                 :            :          * immediately swapped out again after swapon.
    1155                 :            :          */
    1156                 :          0 :         activate_page(page);
    1157                 :            : out:
    1158                 :          0 :         pte_unmap_unlock(pte, ptl);
    1159                 :            : out_nolock:
    1160                 :            :         if (page != swapcache) {
    1161                 :            :                 unlock_page(page);
    1162                 :            :                 put_page(page);
    1163                 :            :         }
    1164                 :          0 :         return ret;
    1165                 :            : }
    1166                 :            : 
    1167                 :          0 : static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
    1168                 :            :                                 unsigned long addr, unsigned long end,
    1169                 :            :                                 swp_entry_t entry, struct page *page)
    1170                 :            : {
    1171                 :            :         pte_t swp_pte = swp_entry_to_pte(entry);
    1172                 :            :         pte_t *pte;
    1173                 :            :         int ret = 0;
    1174                 :            : 
    1175                 :            :         /*
    1176                 :            :          * We don't actually need pte lock while scanning for swp_pte: since
    1177                 :            :          * we hold page lock and mmap_sem, swp_pte cannot be inserted into the
    1178                 :            :          * page table while we're scanning; though it could get zapped, and on
    1179                 :            :          * some architectures (e.g. x86_32 with PAE) we might catch a glimpse
    1180                 :            :          * of unmatched parts which look like swp_pte, so unuse_pte must
    1181                 :            :          * recheck under pte lock.  Scanning without pte lock lets it be
    1182                 :            :          * preemptable whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
    1183                 :            :          */
    1184                 :          0 :         pte = pte_offset_map(pmd, addr);
    1185                 :            :         do {
    1186                 :            :                 /*
    1187                 :            :                  * swapoff spends a _lot_ of time in this loop!
    1188                 :            :                  * Test inline before going to call unuse_pte.
    1189                 :            :                  */
    1190         [ #  # ]:          0 :                 if (unlikely(maybe_same_pte(*pte, swp_pte))) {
    1191                 :          0 :                         pte_unmap(pte);
    1192                 :          0 :                         ret = unuse_pte(vma, pmd, addr, entry, page);
    1193         [ #  # ]:          0 :                         if (ret)
    1194                 :            :                                 goto out;
    1195                 :          0 :                         pte = pte_offset_map(pmd, addr);
    1196                 :            :                 }
    1197         [ #  # ]:          0 :         } while (pte++, addr += PAGE_SIZE, addr != end);
    1198                 :          0 :         pte_unmap(pte - 1);
    1199                 :            : out:
    1200                 :          0 :         return ret;
    1201                 :            : }
    1202                 :            : 
    1203                 :            : static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
    1204                 :            :                                 unsigned long addr, unsigned long end,
    1205                 :            :                                 swp_entry_t entry, struct page *page)
    1206                 :            : {
    1207                 :            :         pmd_t *pmd;
    1208                 :            :         unsigned long next;
    1209                 :            :         int ret;
    1210                 :            : 
    1211                 :            :         pmd = pmd_offset(pud, addr);
    1212                 :            :         do {
    1213                 :            :                 next = pmd_addr_end(addr, end);
    1214         [ #  # ]:          0 :                 if (pmd_none_or_trans_huge_or_clear_bad(pmd))
    1215                 :          0 :                         continue;
    1216                 :          0 :                 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
    1217         [ #  # ]:          0 :                 if (ret)
    1218                 :            :                         return ret;
    1219                 :            :         } while (pmd++, addr = next, addr != end);
    1220                 :            :         return 0;
    1221                 :            : }
    1222                 :            : 
    1223                 :            : static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
    1224                 :            :                                 unsigned long addr, unsigned long end,
    1225                 :            :                                 swp_entry_t entry, struct page *page)
    1226                 :            : {
    1227                 :            :         pud_t *pud;
    1228                 :            :         unsigned long next;
    1229                 :            :         int ret;
    1230                 :            : 
    1231                 :            :         pud = pud_offset(pgd, addr);
    1232                 :            :         do {
    1233                 :            :                 next = pud_addr_end(addr, end);
    1234                 :            :                 if (pud_none_or_clear_bad(pud))
    1235                 :            :                         continue;
    1236                 :            :                 ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
    1237         [ #  # ]:          0 :                 if (ret)
    1238                 :            :                         return ret;
    1239                 :            :         } while (pud++, addr = next, addr != end);
    1240                 :            :         return 0;
    1241                 :            : }
    1242                 :            : 
    1243                 :          0 : static int unuse_vma(struct vm_area_struct *vma,
    1244                 :          0 :                                 swp_entry_t entry, struct page *page)
    1245                 :            : {
    1246                 :            :         pgd_t *pgd;
    1247                 :            :         unsigned long addr, end, next;
    1248                 :            :         int ret;
    1249                 :            : 
    1250         [ #  # ]:          0 :         if (page_anon_vma(page)) {
    1251                 :          0 :                 addr = page_address_in_vma(page, vma);
    1252         [ #  # ]:          0 :                 if (addr == -EFAULT)
    1253                 :            :                         return 0;
    1254                 :            :                 else
    1255                 :          0 :                         end = addr + PAGE_SIZE;
    1256                 :            :         } else {
    1257                 :          0 :                 addr = vma->vm_start;
    1258                 :          0 :                 end = vma->vm_end;
    1259                 :            :         }
    1260                 :            : 
    1261                 :          0 :         pgd = pgd_offset(vma->vm_mm, addr);
    1262                 :            :         do {
    1263         [ #  # ]:          0 :                 next = pgd_addr_end(addr, end);
    1264                 :            :                 if (pgd_none_or_clear_bad(pgd))
    1265                 :            :                         continue;
    1266                 :            :                 ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
    1267         [ #  # ]:          0 :                 if (ret)
    1268                 :            :                         return ret;
    1269         [ #  # ]:          0 :         } while (pgd++, addr = next, addr != end);
    1270                 :            :         return 0;
    1271                 :            : }
    1272                 :            : 
    1273                 :          0 : static int unuse_mm(struct mm_struct *mm,
    1274                 :            :                                 swp_entry_t entry, struct page *page)
    1275                 :            : {
    1276                 :            :         struct vm_area_struct *vma;
    1277                 :            :         int ret = 0;
    1278                 :            : 
    1279         [ #  # ]:          0 :         if (!down_read_trylock(&mm->mmap_sem)) {
    1280                 :            :                 /*
    1281                 :            :                  * Activate page so shrink_inactive_list is unlikely to unmap
    1282                 :            :                  * its ptes while lock is dropped, so swapoff can make progress.
    1283                 :            :                  */
    1284                 :          0 :                 activate_page(page);
    1285                 :          0 :                 unlock_page(page);
    1286                 :          0 :                 down_read(&mm->mmap_sem);
    1287                 :            :                 lock_page(page);
    1288                 :            :         }
    1289         [ #  # ]:          0 :         for (vma = mm->mmap; vma; vma = vma->vm_next) {
    1290 [ #  # ][ #  # ]:          0 :                 if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
    1291                 :            :                         break;
    1292                 :            :         }
    1293                 :          0 :         up_read(&mm->mmap_sem);
    1294                 :          0 :         return (ret < 0)? ret: 0;
    1295                 :            : }
    1296                 :            : 
    1297                 :            : /*
    1298                 :            :  * Scan swap_map (or frontswap_map if frontswap parameter is true)
    1299                 :            :  * from current position to next entry still in use.
    1300                 :            :  * Recycle to start on reaching the end, returning 0 when empty.
    1301                 :            :  */
    1302                 :            : static unsigned int find_next_to_unuse(struct swap_info_struct *si,
    1303                 :            :                                         unsigned int prev, bool frontswap)
    1304                 :            : {
    1305                 :            :         unsigned int max = si->max;
    1306                 :            :         unsigned int i = prev;
    1307                 :            :         unsigned char count;
    1308                 :            : 
    1309                 :            :         /*
    1310                 :            :          * No need for swap_lock here: we're just looking
    1311                 :            :          * for whether an entry is in use, not modifying it; false
    1312                 :            :          * hits are okay, and sys_swapoff() has already prevented new
    1313                 :            :          * allocations from this area (while holding swap_lock).
    1314                 :            :          */
    1315                 :            :         for (;;) {
    1316         [ +  + ]:      33088 :                 if (++i >= max) {
    1317         [ -  + ]:         34 :                         if (!prev) {
    1318                 :            :                                 i = 0;
    1319                 :            :                                 break;
    1320                 :            :                         }
    1321                 :            :                         /*
    1322                 :            :                          * No entries in use at top of swap_map,
    1323                 :            :                          * loop back to start and recheck there.
    1324                 :            :                          */
    1325                 :          0 :                         max = prev + 1;
    1326                 :            :                         prev = 0;
    1327                 :            :                         i = 1;
    1328                 :            :                 }
    1329         [ -  + ]:      33054 :                 if (frontswap) {
    1330                 :            :                         if (frontswap_test(si, i))
    1331                 :            :                                 break;
    1332                 :            :                         else
    1333                 :          0 :                                 continue;
    1334                 :            :                 }
    1335                 :      33054 :                 count = ACCESS_ONCE(si->swap_map[i]);
    1336 [ +  - ][ #  # ]:      33054 :                 if (count && swap_count(count) != SWAP_MAP_BAD)
    1337                 :            :                         break;
    1338                 :            :         }
    1339                 :            :         return i;
    1340                 :            : }
    1341                 :            : 
    1342                 :            : /*
    1343                 :            :  * We completely avoid races by reading each swap page in advance,
    1344                 :            :  * and then search for the process using it.  All the necessary
    1345                 :            :  * page table adjustments can then be made atomically.
    1346                 :            :  *
    1347                 :            :  * if the boolean frontswap is true, only unuse pages_to_unuse pages;
    1348                 :            :  * pages_to_unuse==0 means all pages; ignored if frontswap is false
    1349                 :            :  */
    1350                 :          0 : int try_to_unuse(unsigned int type, bool frontswap,
    1351                 :            :                  unsigned long pages_to_unuse)
    1352                 :            : {
    1353                 :      33122 :         struct swap_info_struct *si = swap_info[type];
    1354                 :            :         struct mm_struct *start_mm;
    1355                 :            :         volatile unsigned char *swap_map; /* swap_map is accessed without
    1356                 :            :                                            * locking. Mark it as volatile
    1357                 :            :                                            * to prevent compiler doing
    1358                 :            :                                            * something odd.
    1359                 :            :                                            */
    1360                 :            :         unsigned char swcount;
    1361                 :            :         struct page *page;
    1362                 :            :         swp_entry_t entry;
    1363                 :            :         unsigned int i = 0;
    1364                 :            :         int retval = 0;
    1365                 :            : 
    1366                 :            :         /*
    1367                 :            :          * When searching mms for an entry, a good strategy is to
    1368                 :            :          * start at the first mm we freed the previous entry from
    1369                 :            :          * (though actually we don't notice whether we or coincidence
    1370                 :            :          * freed the entry).  Initialize this start_mm with a hold.
    1371                 :            :          *
    1372                 :            :          * A simpler strategy would be to start at the last mm we
    1373                 :            :          * freed the previous entry from; but that would take less
    1374                 :            :          * advantage of mmlist ordering, which clusters forked mms
    1375                 :            :          * together, child after parent.  If we race with dup_mmap(), we
    1376                 :            :          * prefer to resolve parent before child, lest we miss entries
    1377                 :            :          * duplicated after we scanned child: using last mm would invert
    1378                 :            :          * that.
    1379                 :            :          */
    1380                 :            :         start_mm = &init_mm;
    1381                 :            :         atomic_inc(&init_mm.mm_users);
    1382                 :            : 
    1383                 :            :         /*
    1384                 :            :          * Keep on scanning until all entries have gone.  Usually,
    1385                 :            :          * one pass through swap_map is enough, but not necessarily:
    1386                 :            :          * there are races when an instance of an entry might be missed.
    1387                 :            :          */
    1388         [ -  + ]:         34 :         while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
    1389         [ #  # ]:          0 :                 if (signal_pending(current)) {
    1390                 :            :                         retval = -EINTR;
    1391                 :            :                         break;
    1392                 :            :                 }
    1393                 :            : 
    1394                 :            :                 /*
    1395                 :            :                  * Get a page for the entry, using the existing swap
    1396                 :            :                  * cache page if there is one.  Otherwise, get a clean
    1397                 :            :                  * page and read the swap into it.
    1398                 :            :                  */
    1399                 :          0 :                 swap_map = &si->swap_map[i];
    1400                 :            :                 entry = swp_entry(type, i);
    1401                 :          0 :                 page = read_swap_cache_async(entry,
    1402                 :            :                                         GFP_HIGHUSER_MOVABLE, NULL, 0);
    1403         [ #  # ]:          0 :                 if (!page) {
    1404                 :            :                         /*
    1405                 :            :                          * Either swap_duplicate() failed because entry
    1406                 :            :                          * has been freed independently, and will not be
    1407                 :            :                          * reused since sys_swapoff() already disabled
    1408                 :            :                          * allocation from here, or alloc_page() failed.
    1409                 :            :                          */
    1410                 :          0 :                         swcount = *swap_map;
    1411                 :            :                         /*
    1412                 :            :                          * We don't hold lock here, so the swap entry could be
    1413                 :            :                          * SWAP_MAP_BAD (when the cluster is discarding).
    1414                 :            :                          * Instead of fail out, We can just skip the swap
    1415                 :            :                          * entry because swapoff will wait for discarding
    1416                 :            :                          * finish anyway.
    1417                 :            :                          */
    1418         [ #  # ]:          0 :                         if (!swcount || swcount == SWAP_MAP_BAD)
    1419                 :          0 :                                 continue;
    1420                 :            :                         retval = -ENOMEM;
    1421                 :            :                         break;
    1422                 :            :                 }
    1423                 :            : 
    1424                 :            :                 /*
    1425                 :            :                  * Don't hold on to start_mm if it looks like exiting.
    1426                 :            :                  */
    1427         [ #  # ]:          0 :                 if (atomic_read(&start_mm->mm_users) == 1) {
    1428                 :          0 :                         mmput(start_mm);
    1429                 :            :                         start_mm = &init_mm;
    1430                 :            :                         atomic_inc(&init_mm.mm_users);
    1431                 :            :                 }
    1432                 :            : 
    1433                 :            :                 /*
    1434                 :            :                  * Wait for and lock page.  When do_swap_page races with
    1435                 :            :                  * try_to_unuse, do_swap_page can handle the fault much
    1436                 :            :                  * faster than try_to_unuse can locate the entry.  This
    1437                 :            :                  * apparently redundant "wait_on_page_locked" lets try_to_unuse
    1438                 :            :                  * defer to do_swap_page in such a case - in some tests,
    1439                 :            :                  * do_swap_page and try_to_unuse repeatedly compete.
    1440                 :            :                  */
    1441                 :            :                 wait_on_page_locked(page);
    1442                 :            :                 wait_on_page_writeback(page);
    1443                 :            :                 lock_page(page);
    1444                 :            :                 wait_on_page_writeback(page);
    1445                 :            : 
    1446                 :            :                 /*
    1447                 :            :                  * Remove all references to entry.
    1448                 :            :                  */
    1449                 :          0 :                 swcount = *swap_map;
    1450         [ #  # ]:          0 :                 if (swap_count(swcount) == SWAP_MAP_SHMEM) {
    1451                 :          0 :                         retval = shmem_unuse(entry, page);
    1452                 :            :                         /* page has already been unlocked and released */
    1453         [ #  # ]:          0 :                         if (retval < 0)
    1454                 :            :                                 break;
    1455                 :          0 :                         continue;
    1456                 :            :                 }
    1457 [ #  # ][ #  # ]:          0 :                 if (swap_count(swcount) && start_mm != &init_mm)
    1458                 :          0 :                         retval = unuse_mm(start_mm, entry, page);
    1459                 :            : 
    1460         [ #  # ]:          0 :                 if (swap_count(*swap_map)) {
    1461                 :          0 :                         int set_start_mm = (*swap_map >= swcount);
    1462                 :          0 :                         struct list_head *p = &start_mm->mmlist;
    1463                 :            :                         struct mm_struct *new_start_mm = start_mm;
    1464                 :            :                         struct mm_struct *prev_mm = start_mm;
    1465                 :            :                         struct mm_struct *mm;
    1466                 :            : 
    1467                 :          0 :                         atomic_inc(&new_start_mm->mm_users);
    1468                 :            :                         atomic_inc(&prev_mm->mm_users);
    1469                 :            :                         spin_lock(&mmlist_lock);
    1470 [ #  # ][ #  # ]:          0 :                         while (swap_count(*swap_map) && !retval &&
                 [ #  # ]
    1471                 :          0 :                                         (p = p->next) != &start_mm->mmlist) {
    1472                 :          0 :                                 mm = list_entry(p, struct mm_struct, mmlist);
    1473         [ #  # ]:          0 :                                 if (!atomic_inc_not_zero(&mm->mm_users))
    1474                 :          0 :                                         continue;
    1475                 :            :                                 spin_unlock(&mmlist_lock);
    1476                 :          0 :                                 mmput(prev_mm);
    1477                 :            :                                 prev_mm = mm;
    1478                 :            : 
    1479                 :          0 :                                 cond_resched();
    1480                 :            : 
    1481                 :          0 :                                 swcount = *swap_map;
    1482         [ #  # ]:          0 :                                 if (!swap_count(swcount)) /* any usage ? */
    1483                 :            :                                         ;
    1484         [ #  # ]:          0 :                                 else if (mm == &init_mm)
    1485                 :            :                                         set_start_mm = 1;
    1486                 :            :                                 else
    1487                 :          0 :                                         retval = unuse_mm(mm, entry, page);
    1488                 :            : 
    1489 [ #  # ][ #  # ]:          0 :                                 if (set_start_mm && *swap_map < swcount) {
    1490                 :          0 :                                         mmput(new_start_mm);
    1491                 :            :                                         atomic_inc(&mm->mm_users);
    1492                 :            :                                         new_start_mm = mm;
    1493                 :            :                                         set_start_mm = 0;
    1494                 :            :                                 }
    1495                 :            :                                 spin_lock(&mmlist_lock);
    1496                 :            :                         }
    1497                 :            :                         spin_unlock(&mmlist_lock);
    1498                 :          0 :                         mmput(prev_mm);
    1499                 :          0 :                         mmput(start_mm);
    1500                 :            :                         start_mm = new_start_mm;
    1501                 :            :                 }
    1502         [ #  # ]:          0 :                 if (retval) {
    1503                 :          0 :                         unlock_page(page);
    1504                 :          0 :                         page_cache_release(page);
    1505                 :          0 :                         break;
    1506                 :            :                 }
    1507                 :            : 
    1508                 :            :                 /*
    1509                 :            :                  * If a reference remains (rare), we would like to leave
    1510                 :            :                  * the page in the swap cache; but try_to_unmap could
    1511                 :            :                  * then re-duplicate the entry once we drop page lock,
    1512                 :            :                  * so we might loop indefinitely; also, that page could
    1513                 :            :                  * not be swapped out to other storage meanwhile.  So:
    1514                 :            :                  * delete from cache even if there's another reference,
    1515                 :            :                  * after ensuring that the data has been saved to disk -
    1516                 :            :                  * since if the reference remains (rarer), it will be
    1517                 :            :                  * read from disk into another page.  Splitting into two
    1518                 :            :                  * pages would be incorrect if swap supported "shared
    1519                 :            :                  * private" pages, but they are handled by tmpfs files.
    1520                 :            :                  *
    1521                 :            :                  * Given how unuse_vma() targets one particular offset
    1522                 :            :                  * in an anon_vma, once the anon_vma has been determined,
    1523                 :            :                  * this splitting happens to be just what is needed to
    1524                 :            :                  * handle where KSM pages have been swapped out: re-reading
    1525                 :            :                  * is unnecessarily slow, but we can fix that later on.
    1526                 :            :                  */
    1527 [ #  # ][ #  # ]:          0 :                 if (swap_count(*swap_map) &&
    1528         [ #  # ]:          0 :                      PageDirty(page) && PageSwapCache(page)) {
    1529                 :          0 :                         struct writeback_control wbc = {
    1530                 :            :                                 .sync_mode = WB_SYNC_NONE,
    1531                 :            :                         };
    1532                 :            : 
    1533                 :          0 :                         swap_writepage(page, &wbc);
    1534                 :            :                         lock_page(page);
    1535                 :            :                         wait_on_page_writeback(page);
    1536                 :            :                 }
    1537                 :            : 
    1538                 :            :                 /*
    1539                 :            :                  * It is conceivable that a racing task removed this page from
    1540                 :            :                  * swap cache just before we acquired the page lock at the top,
    1541                 :            :                  * or while we dropped it in unuse_mm().  The page might even
    1542                 :            :                  * be back in swap cache on another swap area: that we must not
    1543                 :            :                  * delete, since it may not have been written out to swap yet.
    1544                 :            :                  */
    1545 [ #  # ][ #  # ]:          0 :                 if (PageSwapCache(page) &&
    1546                 :          0 :                     likely(page_private(page) == entry.val))
    1547                 :          0 :                         delete_from_swap_cache(page);
    1548                 :            : 
    1549                 :            :                 /*
    1550                 :            :                  * So we could skip searching mms once swap count went
    1551                 :            :                  * to 1, we did not mark any present ptes as dirty: must
    1552                 :            :                  * mark page dirty so shrink_page_list will preserve it.
    1553                 :            :                  */
    1554                 :            :                 SetPageDirty(page);
    1555                 :          0 :                 unlock_page(page);
    1556                 :          0 :                 page_cache_release(page);
    1557                 :            : 
    1558                 :            :                 /*
    1559                 :            :                  * Make sure that we aren't completely killing
    1560                 :            :                  * interactive performance.
    1561                 :            :                  */
    1562                 :          0 :                 cond_resched();
    1563         [ #  # ]:          0 :                 if (frontswap && pages_to_unuse > 0) {
    1564         [ #  # ]:          0 :                         if (!--pages_to_unuse)
    1565                 :            :                                 break;
    1566                 :            :                 }
    1567                 :            :         }
    1568                 :            : 
    1569                 :         34 :         mmput(start_mm);
    1570                 :         34 :         return retval;
    1571                 :            : }
    1572                 :            : 
    1573                 :            : /*
    1574                 :            :  * After a successful try_to_unuse, if no swap is now in use, we know
    1575                 :            :  * we can empty the mmlist.  swap_lock must be held on entry and exit.
    1576                 :            :  * Note that mmlist_lock nests inside swap_lock, and an mm must be
    1577                 :            :  * added to the mmlist just after page_duplicate - before would be racy.
    1578                 :            :  */
    1579                 :          0 : static void drain_mmlist(void)
    1580                 :            : {
    1581                 :            :         struct list_head *p, *next;
    1582                 :            :         unsigned int type;
    1583                 :            : 
    1584         [ +  + ]:        939 :         for (type = 0; type < nr_swapfiles; type++)
    1585         [ +  - ]:        905 :                 if (swap_info[type]->inuse_pages)
    1586                 :         34 :                         return;
    1587                 :            :         spin_lock(&mmlist_lock);
    1588         [ -  + ]:         68 :         list_for_each_safe(p, next, &init_mm.mmlist)
    1589                 :            :                 list_del_init(p);
    1590                 :            :         spin_unlock(&mmlist_lock);
    1591                 :            : }
    1592                 :            : 
    1593                 :            : /*
    1594                 :            :  * Use this swapdev's extent info to locate the (PAGE_SIZE) block which
    1595                 :            :  * corresponds to page offset for the specified swap entry.
    1596                 :            :  * Note that the type of this function is sector_t, but it returns page offset
    1597                 :            :  * into the bdev, not sector offset.
    1598                 :            :  */
    1599                 :          0 : static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
    1600                 :            : {
    1601                 :            :         struct swap_info_struct *sis;
    1602                 :            :         struct swap_extent *start_se;
    1603                 :            :         struct swap_extent *se;
    1604                 :            :         pgoff_t offset;
    1605                 :            : 
    1606                 :          0 :         sis = swap_info[swp_type(entry)];
    1607                 :          0 :         *bdev = sis->bdev;
    1608                 :            : 
    1609                 :            :         offset = swp_offset(entry);
    1610                 :          0 :         start_se = sis->curr_swap_extent;
    1611                 :            :         se = start_se;
    1612                 :            : 
    1613                 :            :         for ( ; ; ) {
    1614                 :            :                 struct list_head *lh;
    1615                 :            : 
    1616 [ #  # ][ #  # ]:          0 :                 if (se->start_page <= offset &&
    1617                 :          0 :                                 offset < (se->start_page + se->nr_pages)) {
    1618                 :          0 :                         return se->start_block + (offset - se->start_page);
    1619                 :            :                 }
    1620                 :          0 :                 lh = se->list.next;
    1621                 :            :                 se = list_entry(lh, struct swap_extent, list);
    1622                 :          0 :                 sis->curr_swap_extent = se;
    1623         [ #  # ]:          0 :                 BUG_ON(se == start_se);         /* It *must* be present */
    1624                 :            :         }
    1625                 :            : }
    1626                 :            : 
    1627                 :            : /*
    1628                 :            :  * Returns the page offset into bdev for the specified page's swap entry.
    1629                 :            :  */
    1630                 :          0 : sector_t map_swap_page(struct page *page, struct block_device **bdev)
    1631                 :            : {
    1632                 :            :         swp_entry_t entry;
    1633                 :          0 :         entry.val = page_private(page);
    1634                 :          0 :         return map_swap_entry(entry, bdev);
    1635                 :            : }
    1636                 :            : 
    1637                 :            : /*
    1638                 :            :  * Free all of a swapdev's extent information
    1639                 :            :  */
    1640                 :          0 : static void destroy_swap_extents(struct swap_info_struct *sis)
    1641                 :            : {
    1642         [ -  + ]:         37 :         while (!list_empty(&sis->first_swap_extent.list)) {
    1643                 :            :                 struct swap_extent *se;
    1644                 :            : 
    1645                 :            :                 se = list_entry(sis->first_swap_extent.list.next,
    1646                 :            :                                 struct swap_extent, list);
    1647                 :            :                 list_del(&se->list);
    1648                 :          0 :                 kfree(se);
    1649                 :            :         }
    1650                 :            : 
    1651         [ -  + ]:         37 :         if (sis->flags & SWP_FILE) {
    1652                 :          0 :                 struct file *swap_file = sis->swap_file;
    1653                 :          0 :                 struct address_space *mapping = swap_file->f_mapping;
    1654                 :            : 
    1655                 :          0 :                 sis->flags &= ~SWP_FILE;
    1656                 :          0 :                 mapping->a_ops->swap_deactivate(swap_file);
    1657                 :            :         }
    1658                 :          0 : }
    1659                 :            : 
    1660                 :            : /*
    1661                 :            :  * Add a block range (and the corresponding page range) into this swapdev's
    1662                 :            :  * extent list.  The extent list is kept sorted in page order.
    1663                 :            :  *
    1664                 :            :  * This function rather assumes that it is called in ascending page order.
    1665                 :            :  */
    1666                 :            : int
    1667                 :          0 : add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
    1668                 :            :                 unsigned long nr_pages, sector_t start_block)
    1669                 :            : {
    1670                 :            :         struct swap_extent *se;
    1671                 :            :         struct swap_extent *new_se;
    1672                 :            :         struct list_head *lh;
    1673                 :            : 
    1674         [ +  + ]:      33088 :         if (start_page == 0) {
    1675                 :         34 :                 se = &sis->first_swap_extent;
    1676                 :         34 :                 sis->curr_swap_extent = se;
    1677                 :         34 :                 se->start_page = 0;
    1678                 :         34 :                 se->nr_pages = nr_pages;
    1679                 :         34 :                 se->start_block = start_block;
    1680                 :         34 :                 return 1;
    1681                 :            :         } else {
    1682                 :      33054 :                 lh = sis->first_swap_extent.list.prev;       /* Highest extent */
    1683                 :            :                 se = list_entry(lh, struct swap_extent, list);
    1684         [ -  + ]:      33054 :                 BUG_ON(se->start_page + se->nr_pages != start_page);
    1685         [ +  - ]:      33054 :                 if (se->start_block + se->nr_pages == start_block) {
    1686                 :            :                         /* Merge it */
    1687                 :      33054 :                         se->nr_pages += nr_pages;
    1688                 :      33054 :                         return 0;
    1689                 :            :                 }
    1690                 :            :         }
    1691                 :            : 
    1692                 :            :         /*
    1693                 :            :          * No merge.  Insert a new extent, preserving ordering.
    1694                 :            :          */
    1695                 :            :         new_se = kmalloc(sizeof(*se), GFP_KERNEL);
    1696         [ #  # ]:          0 :         if (new_se == NULL)
    1697                 :            :                 return -ENOMEM;
    1698                 :          0 :         new_se->start_page = start_page;
    1699                 :          0 :         new_se->nr_pages = nr_pages;
    1700                 :          0 :         new_se->start_block = start_block;
    1701                 :            : 
    1702                 :          0 :         list_add_tail(&new_se->list, &sis->first_swap_extent.list);
    1703                 :          0 :         return 1;
    1704                 :            : }
    1705                 :            : 
    1706                 :            : /*
    1707                 :            :  * A `swap extent' is a simple thing which maps a contiguous range of pages
    1708                 :            :  * onto a contiguous range of disk blocks.  An ordered list of swap extents
    1709                 :            :  * is built at swapon time and is then used at swap_writepage/swap_readpage
    1710                 :            :  * time for locating where on disk a page belongs.
    1711                 :            :  *
    1712                 :            :  * If the swapfile is an S_ISBLK block device, a single extent is installed.
    1713                 :            :  * This is done so that the main operating code can treat S_ISBLK and S_ISREG
    1714                 :            :  * swap files identically.
    1715                 :            :  *
    1716                 :            :  * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap
    1717                 :            :  * extent list operates in PAGE_SIZE disk blocks.  Both S_ISREG and S_ISBLK
    1718                 :            :  * swapfiles are handled *identically* after swapon time.
    1719                 :            :  *
    1720                 :            :  * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks
    1721                 :            :  * and will parse them into an ordered extent list, in PAGE_SIZE chunks.  If
    1722                 :            :  * some stray blocks are found which do not fall within the PAGE_SIZE alignment
    1723                 :            :  * requirements, they are simply tossed out - we will never use those blocks
    1724                 :            :  * for swapping.
    1725                 :            :  *
    1726                 :            :  * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon.  This
    1727                 :            :  * prevents root from shooting her foot off by ftruncating an in-use swapfile,
    1728                 :            :  * which will scribble on the fs.
    1729                 :            :  *
    1730                 :            :  * The amount of disk space which a single swap extent represents varies.
    1731                 :            :  * Typically it is in the 1-4 megabyte range.  So we can have hundreds of
    1732                 :            :  * extents in the list.  To avoid much list walking, we cache the previous
    1733                 :            :  * search location in `curr_swap_extent', and start new searches from there.
    1734                 :            :  * This is extremely effective.  The average number of iterations in
    1735                 :            :  * map_swap_page() has been measured at about 0.3 per page.  - akpm.
    1736                 :            :  */
    1737                 :          0 : static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
    1738                 :            : {
    1739                 :         34 :         struct file *swap_file = sis->swap_file;
    1740                 :         34 :         struct address_space *mapping = swap_file->f_mapping;
    1741                 :         34 :         struct inode *inode = mapping->host;
    1742                 :            :         int ret;
    1743                 :            : 
    1744         [ -  + ]:         34 :         if (S_ISBLK(inode->i_mode)) {
    1745                 :          0 :                 ret = add_swap_extent(sis, 0, sis->max, 0);
    1746                 :          0 :                 *span = sis->pages;
    1747                 :          0 :                 return ret;
    1748                 :            :         }
    1749                 :            : 
    1750         [ -  + ]:         34 :         if (mapping->a_ops->swap_activate) {
    1751                 :          0 :                 ret = mapping->a_ops->swap_activate(sis, swap_file, span);
    1752         [ #  # ]:         34 :                 if (!ret) {
    1753                 :          0 :                         sis->flags |= SWP_FILE;
    1754                 :          0 :                         ret = add_swap_extent(sis, 0, sis->max, 0);
    1755                 :          0 :                         *span = sis->pages;
    1756                 :            :                 }
    1757                 :          0 :                 return ret;
    1758                 :            :         }
    1759                 :            : 
    1760                 :         34 :         return generic_swapfile_activate(sis, swap_file, span);
    1761                 :            : }
    1762                 :            : 
    1763                 :          0 : static void _enable_swap_info(struct swap_info_struct *p, int prio,
    1764                 :            :                                 unsigned char *swap_map,
    1765                 :            :                                 struct swap_cluster_info *cluster_info)
    1766                 :            : {
    1767                 :            :         int i, prev;
    1768                 :            : 
    1769         [ -  + ]:         34 :         if (prio >= 0)
    1770                 :          0 :                 p->prio = prio;
    1771                 :            :         else
    1772                 :         34 :                 p->prio = --least_priority;
    1773                 :         34 :         p->swap_map = swap_map;
    1774                 :         34 :         p->cluster_info = cluster_info;
    1775                 :         34 :         p->flags |= SWP_WRITEOK;
    1776                 :         34 :         atomic_long_add(p->pages, &nr_swap_pages);
    1777                 :         68 :         total_swap_pages += p->pages;
    1778                 :            : 
    1779                 :            :         /* insert swap space into swap_list: */
    1780                 :            :         prev = -1;
    1781         [ +  + ]:        503 :         for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
    1782         [ +  - ]:        435 :                 if (p->prio >= swap_info[i]->prio)
    1783                 :            :                         break;
    1784                 :            :                 prev = i;
    1785                 :            :         }
    1786                 :         34 :         p->next = i;
    1787         [ +  + ]:         34 :         if (prev < 0)
    1788                 :          5 :                 swap_list.head = swap_list.next = p->type;
    1789                 :            :         else
    1790                 :         29 :                 swap_info[prev]->next = p->type;
    1791                 :         34 : }
    1792                 :            : 
    1793                 :         34 : static void enable_swap_info(struct swap_info_struct *p, int prio,
    1794                 :            :                                 unsigned char *swap_map,
    1795                 :            :                                 struct swap_cluster_info *cluster_info,
    1796                 :            :                                 unsigned long *frontswap_map)
    1797                 :            : {
    1798                 :            :         frontswap_init(p->type, frontswap_map);
    1799                 :            :         spin_lock(&swap_lock);
    1800                 :            :         spin_lock(&p->lock);
    1801                 :         34 :          _enable_swap_info(p, prio, swap_map, cluster_info);
    1802                 :            :         spin_unlock(&p->lock);
    1803                 :            :         spin_unlock(&swap_lock);
    1804                 :         34 : }
    1805                 :            : 
    1806                 :          0 : static void reinsert_swap_info(struct swap_info_struct *p)
    1807                 :            : {
    1808                 :            :         spin_lock(&swap_lock);
    1809                 :            :         spin_lock(&p->lock);
    1810                 :          0 :         _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info);
    1811                 :            :         spin_unlock(&p->lock);
    1812                 :            :         spin_unlock(&swap_lock);
    1813                 :          0 : }
    1814                 :            : 
    1815                 :          0 : SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
    1816                 :            : {
    1817                 :            :         struct swap_info_struct *p = NULL;
    1818                 :            :         unsigned char *swap_map;
    1819                 :            :         struct swap_cluster_info *cluster_info;
    1820                 :            :         unsigned long *frontswap_map;
    1821                 :            :         struct file *swap_file, *victim;
    1822                 :            :         struct address_space *mapping;
    1823                 :            :         struct inode *inode;
    1824                 :            :         struct filename *pathname;
    1825                 :            :         int i, type, prev;
    1826                 :            :         int err;
    1827                 :            :         unsigned int old_block_size;
    1828                 :            : 
    1829         [ +  + ]:         37 :         if (!capable(CAP_SYS_ADMIN))
    1830                 :            :                 return -EPERM;
    1831                 :            : 
    1832         [ -  + ]:         36 :         BUG_ON(!current->mm);
    1833                 :            : 
    1834                 :         36 :         pathname = getname(specialfile);
    1835         [ -  + ]:         36 :         if (IS_ERR(pathname))
    1836                 :            :                 return PTR_ERR(pathname);
    1837                 :            : 
    1838                 :         36 :         victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
    1839                 :            :         err = PTR_ERR(victim);
    1840         [ +  + ]:         36 :         if (IS_ERR(victim))
    1841                 :            :                 goto out;
    1842                 :            : 
    1843                 :         35 :         mapping = victim->f_mapping;
    1844                 :            :         prev = -1;
    1845                 :            :         spin_lock(&swap_lock);
    1846         [ +  + ]:         36 :         for (type = swap_list.head; type >= 0; type = swap_info[type]->next) {
    1847                 :         35 :                 p = swap_info[type];
    1848         [ +  - ]:         35 :                 if (p->flags & SWP_WRITEOK) {
    1849         [ +  + ]:         35 :                         if (p->swap_file->f_mapping == mapping)
    1850                 :            :                                 break;
    1851                 :            :                 }
    1852                 :            :                 prev = type;
    1853                 :            :         }
    1854         [ +  + ]:         35 :         if (type < 0) {
    1855                 :            :                 err = -EINVAL;
    1856                 :            :                 spin_unlock(&swap_lock);
    1857                 :            :                 goto out_dput;
    1858                 :            :         }
    1859         [ +  - ]:         34 :         if (!security_vm_enough_memory_mm(current->mm, p->pages))
    1860                 :         34 :                 vm_unacct_memory(p->pages);
    1861                 :            :         else {
    1862                 :            :                 err = -ENOMEM;
    1863                 :            :                 spin_unlock(&swap_lock);
    1864                 :            :                 goto out_dput;
    1865                 :            :         }
    1866         [ +  - ]:         34 :         if (prev < 0)
    1867                 :         34 :                 swap_list.head = p->next;
    1868                 :            :         else
    1869                 :          0 :                 swap_info[prev]->next = p->next;
    1870         [ +  - ]:         34 :         if (type == swap_list.next) {
    1871                 :            :                 /* just pick something that's safe... */
    1872                 :         34 :                 swap_list.next = swap_list.head;
    1873                 :            :         }
    1874                 :            :         spin_lock(&p->lock);
    1875         [ +  - ]:         34 :         if (p->prio < 0) {
    1876         [ +  + ]:        469 :                 for (i = p->next; i >= 0; i = swap_info[i]->next)
    1877                 :        435 :                         swap_info[i]->prio = p->prio--;
    1878                 :         34 :                 least_priority++;
    1879                 :            :         }
    1880                 :          0 :         atomic_long_sub(p->pages, &nr_swap_pages);
    1881                 :         34 :         total_swap_pages -= p->pages;
    1882                 :         34 :         p->flags &= ~SWP_WRITEOK;
    1883                 :            :         spin_unlock(&p->lock);
    1884                 :            :         spin_unlock(&swap_lock);
    1885                 :            : 
    1886                 :            :         set_current_oom_origin();
    1887                 :         34 :         err = try_to_unuse(type, false, 0); /* force all pages to be unused */
    1888                 :            :         clear_current_oom_origin();
    1889                 :            : 
    1890         [ -  + ]:         34 :         if (err) {
    1891                 :            :                 /* re-insert swap space back into swap_list */
    1892                 :          0 :                 reinsert_swap_info(p);
    1893                 :            :                 goto out_dput;
    1894                 :            :         }
    1895                 :            : 
    1896                 :         34 :         flush_work(&p->discard_work);
    1897                 :            : 
    1898                 :         34 :         destroy_swap_extents(p);
    1899         [ -  + ]:         34 :         if (p->flags & SWP_CONTINUED)
    1900                 :          0 :                 free_swap_count_continuations(p);
    1901                 :            : 
    1902                 :         34 :         mutex_lock(&swapon_mutex);
    1903                 :            :         spin_lock(&swap_lock);
    1904                 :            :         spin_lock(&p->lock);
    1905                 :         34 :         drain_mmlist();
    1906                 :            : 
    1907                 :            :         /* wait for anyone still in scan_swap_map */
    1908                 :         34 :         p->highest_bit = 0;          /* cuts scans short */
    1909         [ -  + ]:         34 :         while (p->flags >= SWP_SCANNING) {
    1910                 :            :                 spin_unlock(&p->lock);
    1911                 :            :                 spin_unlock(&swap_lock);
    1912                 :          0 :                 schedule_timeout_uninterruptible(1);
    1913                 :            :                 spin_lock(&swap_lock);
    1914                 :            :                 spin_lock(&p->lock);
    1915                 :            :         }
    1916                 :            : 
    1917                 :         34 :         swap_file = p->swap_file;
    1918                 :         34 :         old_block_size = p->old_block_size;
    1919                 :         34 :         p->swap_file = NULL;
    1920                 :         34 :         p->max = 0;
    1921                 :         34 :         swap_map = p->swap_map;
    1922                 :         34 :         p->swap_map = NULL;
    1923                 :         34 :         cluster_info = p->cluster_info;
    1924                 :         34 :         p->cluster_info = NULL;
    1925                 :         34 :         p->flags = 0;
    1926                 :            :         frontswap_map = frontswap_map_get(p);
    1927                 :            :         spin_unlock(&p->lock);
    1928                 :            :         spin_unlock(&swap_lock);
    1929                 :            :         frontswap_invalidate_area(type);
    1930                 :            :         frontswap_map_set(p, NULL);
    1931                 :         34 :         mutex_unlock(&swapon_mutex);
    1932                 :         34 :         free_percpu(p->percpu_cluster);
    1933                 :         34 :         p->percpu_cluster = NULL;
    1934                 :         34 :         vfree(swap_map);
    1935                 :         34 :         vfree(cluster_info);
    1936                 :         34 :         vfree(frontswap_map);
    1937                 :            :         /* Destroy swap account information */
    1938                 :            :         swap_cgroup_swapoff(type);
    1939                 :            : 
    1940                 :         34 :         inode = mapping->host;
    1941         [ -  + ]:         34 :         if (S_ISBLK(inode->i_mode)) {
    1942                 :          0 :                 struct block_device *bdev = I_BDEV(inode);
    1943                 :          0 :                 set_blocksize(bdev, old_block_size);
    1944                 :          0 :                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
    1945                 :            :         } else {
    1946                 :         34 :                 mutex_lock(&inode->i_mutex);
    1947                 :         34 :                 inode->i_flags &= ~S_SWAPFILE;
    1948                 :         34 :                 mutex_unlock(&inode->i_mutex);
    1949                 :            :         }
    1950                 :         34 :         filp_close(swap_file, NULL);
    1951                 :            :         err = 0;
    1952                 :            :         atomic_inc(&proc_poll_event);
    1953                 :         34 :         wake_up_interruptible(&proc_poll_wait);
    1954                 :            : 
    1955                 :            : out_dput:
    1956                 :         35 :         filp_close(victim, NULL);
    1957                 :            : out:
    1958                 :         36 :         putname(pathname);
    1959                 :            :         return err;
    1960                 :            : }
    1961                 :            : 
    1962                 :            : #ifdef CONFIG_PROC_FS
    1963                 :          0 : static unsigned swaps_poll(struct file *file, poll_table *wait)
    1964                 :            : {
    1965                 :          0 :         struct seq_file *seq = file->private_data;
    1966                 :            : 
    1967                 :            :         poll_wait(file, &proc_poll_wait, wait);
    1968                 :            : 
    1969         [ #  # ]:          0 :         if (seq->poll_event != atomic_read(&proc_poll_event)) {
    1970                 :          0 :                 seq->poll_event = atomic_read(&proc_poll_event);
    1971                 :          0 :                 return POLLIN | POLLRDNORM | POLLERR | POLLPRI;
    1972                 :            :         }
    1973                 :            : 
    1974                 :            :         return POLLIN | POLLRDNORM;
    1975                 :            : }
    1976                 :            : 
    1977                 :            : /* iterator */
    1978                 :          0 : static void *swap_start(struct seq_file *swap, loff_t *pos)
    1979                 :            : {
    1980                 :            :         struct swap_info_struct *si;
    1981                 :            :         int type;
    1982                 :        107 :         loff_t l = *pos;
    1983                 :            : 
    1984                 :        107 :         mutex_lock(&swapon_mutex);
    1985                 :            : 
    1986         [ +  + ]:        107 :         if (!l)
    1987                 :            :                 return SEQ_START_TOKEN;
    1988                 :            : 
    1989         [ +  + ]:       1178 :         for (type = 0; type < nr_swapfiles; type++) {
    1990                 :       1140 :                 smp_rmb();      /* read nr_swapfiles before swap_info[type] */
    1991                 :       1140 :                 si = swap_info[type];
    1992 [ -  + ][ #  # ]:       1140 :                 if (!(si->flags & SWP_USED) || !si->swap_map)
    1993                 :       1140 :                         continue;
    1994         [ #  # ]:          0 :                 if (!--l)
    1995                 :            :                         return si;
    1996                 :            :         }
    1997                 :            : 
    1998                 :            :         return NULL;
    1999                 :            : }
    2000                 :            : 
    2001                 :          0 : static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
    2002                 :            : {
    2003                 :            :         struct swap_info_struct *si = v;
    2004                 :            :         int type;
    2005                 :            : 
    2006         [ +  + ]:        534 :         if (v == SEQ_START_TOKEN)
    2007                 :            :                 type = 0;
    2008                 :            :         else
    2009                 :        534 :                 type = si->type + 1;
    2010                 :            : 
    2011         [ +  + ]:       2111 :         for (; type < nr_swapfiles; type++) {
    2012                 :       2042 :                 smp_rmb();      /* read nr_swapfiles before swap_info[type] */
    2013                 :       2042 :                 si = swap_info[type];
    2014 [ +  + ][ -  + ]:       2042 :                 if (!(si->flags & SWP_USED) || !si->swap_map)
    2015                 :       1577 :                         continue;
    2016                 :        465 :                 ++*pos;
    2017                 :        465 :                 return si;
    2018                 :            :         }
    2019                 :            : 
    2020                 :            :         return NULL;
    2021                 :            : }
    2022                 :            : 
    2023                 :          0 : static void swap_stop(struct seq_file *swap, void *v)
    2024                 :            : {
    2025                 :        107 :         mutex_unlock(&swapon_mutex);
    2026                 :        107 : }
    2027                 :            : 
    2028                 :          0 : static int swap_show(struct seq_file *swap, void *v)
    2029                 :            : {
    2030                 :            :         struct swap_info_struct *si = v;
    2031                 :        465 :         struct file *file;
    2032                 :            :         int len;
    2033                 :            : 
    2034         [ +  + ]:        534 :         if (si == SEQ_START_TOKEN) {
    2035                 :         69 :                 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
    2036                 :         69 :                 return 0;
    2037                 :            :         }
    2038                 :            : 
    2039                 :        465 :         file = si->swap_file;
    2040                 :        465 :         len = seq_path(swap, &file->f_path, " \t\n\\");
    2041 [ -  + ][ +  - ]:        465 :         seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
    2042                 :            :                         len < 40 ? 40 - len : 1, " ",
    2043                 :        465 :                         S_ISBLK(file_inode(file)->i_mode) ?
    2044                 :            :                                 "partition" : "file\t",
    2045                 :          0 :                         si->pages << (PAGE_SHIFT - 10),
    2046                 :          0 :                         si->inuse_pages << (PAGE_SHIFT - 10),
    2047                 :          0 :                         si->prio);
    2048                 :        465 :         return 0;
    2049                 :            : }
    2050                 :            : 
    2051                 :            : static const struct seq_operations swaps_op = {
    2052                 :            :         .start =        swap_start,
    2053                 :            :         .next =         swap_next,
    2054                 :            :         .stop =         swap_stop,
    2055                 :            :         .show =         swap_show
    2056                 :            : };
    2057                 :            : 
    2058                 :          0 : static int swaps_open(struct inode *inode, struct file *file)
    2059                 :            : {
    2060                 :            :         struct seq_file *seq;
    2061                 :            :         int ret;
    2062                 :            : 
    2063                 :         69 :         ret = seq_open(file, &swaps_op);
    2064         [ +  - ]:         69 :         if (ret)
    2065                 :            :                 return ret;
    2066                 :            : 
    2067                 :         69 :         seq = file->private_data;
    2068                 :         69 :         seq->poll_event = atomic_read(&proc_poll_event);
    2069                 :         69 :         return 0;
    2070                 :            : }
    2071                 :            : 
    2072                 :            : static const struct file_operations proc_swaps_operations = {
    2073                 :            :         .open           = swaps_open,
    2074                 :            :         .read           = seq_read,
    2075                 :            :         .llseek         = seq_lseek,
    2076                 :            :         .release        = seq_release,
    2077                 :            :         .poll           = swaps_poll,
    2078                 :            : };
    2079                 :            : 
    2080                 :          0 : static int __init procswaps_init(void)
    2081                 :            : {
    2082                 :            :         proc_create("swaps", 0, NULL, &proc_swaps_operations);
    2083                 :          0 :         return 0;
    2084                 :            : }
    2085                 :            : __initcall(procswaps_init);
    2086                 :            : #endif /* CONFIG_PROC_FS */
    2087                 :            : 
    2088                 :            : #ifdef MAX_SWAPFILES_CHECK
    2089                 :          0 : static int __init max_swapfiles_check(void)
    2090                 :            : {
    2091                 :            :         MAX_SWAPFILES_CHECK();
    2092                 :          0 :         return 0;
    2093                 :            : }
    2094                 :            : late_initcall(max_swapfiles_check);
    2095                 :            : #endif
    2096                 :            : 
    2097                 :          0 : static struct swap_info_struct *alloc_swap_info(void)
    2098                 :            : {
    2099                 :            :         struct swap_info_struct *p;
    2100                 :            :         unsigned int type;
    2101                 :            : 
    2102                 :            :         p = kzalloc(sizeof(*p), GFP_KERNEL);
    2103         [ +  - ]:         38 :         if (!p)
    2104                 :            :                 return ERR_PTR(-ENOMEM);
    2105                 :            : 
    2106                 :            :         spin_lock(&swap_lock);
    2107         [ +  + ]:        542 :         for (type = 0; type < nr_swapfiles; type++) {
    2108         [ +  + ]:        473 :                 if (!(swap_info[type]->flags & SWP_USED))
    2109                 :            :                         break;
    2110                 :            :         }
    2111         [ +  + ]:         38 :         if (type >= MAX_SWAPFILES) {
    2112                 :            :                 spin_unlock(&swap_lock);
    2113                 :          1 :                 kfree(p);
    2114                 :          1 :                 return ERR_PTR(-EPERM);
    2115                 :            :         }
    2116         [ +  + ]:         37 :         if (type >= nr_swapfiles) {
    2117                 :         30 :                 p->type = type;
    2118                 :         30 :                 swap_info[type] = p;
    2119                 :            :                 /*
    2120                 :            :                  * Write swap_info[type] before nr_swapfiles, in case a
    2121                 :            :                  * racing procfs swap_start() or swap_next() is reading them.
    2122                 :            :                  * (We never shrink nr_swapfiles, we never free this entry.)
    2123                 :            :                  */
    2124                 :         30 :                 smp_wmb();
    2125                 :         30 :                 nr_swapfiles++;
    2126                 :            :         } else {
    2127                 :          7 :                 kfree(p);
    2128                 :          7 :                 p = swap_info[type];
    2129                 :            :                 /*
    2130                 :            :                  * Do not memset this entry: a racing procfs swap_next()
    2131                 :            :                  * would be relying on p->type to remain valid.
    2132                 :            :                  */
    2133                 :            :         }
    2134                 :         37 :         INIT_LIST_HEAD(&p->first_swap_extent.list);
    2135                 :         37 :         p->flags = SWP_USED;
    2136                 :         37 :         p->next = -1;
    2137                 :            :         spin_unlock(&swap_lock);
    2138                 :         37 :         spin_lock_init(&p->lock);
    2139                 :            : 
    2140                 :         37 :         return p;
    2141                 :            : }
    2142                 :            : 
    2143                 :          0 : static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
    2144                 :            : {
    2145                 :            :         int error;
    2146                 :            : 
    2147         [ -  + ]:         35 :         if (S_ISBLK(inode->i_mode)) {
    2148                 :          0 :                 p->bdev = bdgrab(I_BDEV(inode));
    2149                 :          0 :                 error = blkdev_get(p->bdev,
    2150                 :            :                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL,
    2151                 :            :                                    sys_swapon);
    2152         [ #  # ]:          0 :                 if (error < 0) {
    2153                 :          0 :                         p->bdev = NULL;
    2154                 :          0 :                         return -EINVAL;
    2155                 :            :                 }
    2156                 :          0 :                 p->old_block_size = block_size(p->bdev);
    2157                 :          0 :                 error = set_blocksize(p->bdev, PAGE_SIZE);
    2158         [ #  # ]:          0 :                 if (error < 0)
    2159                 :            :                         return error;
    2160                 :          0 :                 p->flags |= SWP_BLKDEV;
    2161         [ +  - ]:         35 :         } else if (S_ISREG(inode->i_mode)) {
    2162                 :         35 :                 p->bdev = inode->i_sb->s_bdev;
    2163                 :         35 :                 mutex_lock(&inode->i_mutex);
    2164         [ +  - ]:         35 :                 if (IS_SWAPFILE(inode))
    2165                 :            :                         return -EBUSY;
    2166                 :            :         } else
    2167                 :            :                 return -EINVAL;
    2168                 :            : 
    2169                 :            :         return 0;
    2170                 :            : }
    2171                 :            : 
    2172                 :          0 : static unsigned long read_swap_header(struct swap_info_struct *p,
    2173                 :            :                                         union swap_header *swap_header,
    2174                 :            :                                         struct inode *inode)
    2175                 :            : {
    2176                 :            :         int i;
    2177                 :            :         unsigned long maxpages;
    2178                 :            :         unsigned long swapfilepages;
    2179                 :            :         unsigned long last_page;
    2180                 :            : 
    2181         [ +  + ]:         35 :         if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
    2182                 :          1 :                 pr_err("Unable to find swap-space signature\n");
    2183                 :          1 :                 return 0;
    2184                 :            :         }
    2185                 :            : 
    2186                 :            :         /* swap partition endianess hack... */
    2187 [ -  + ][ -  + ]:         34 :         if (swab32(swap_header->info.version) == 1) {
    2188                 :            :                 swab32s(&swap_header->info.version);
    2189                 :            :                 swab32s(&swap_header->info.last_page);
    2190                 :            :                 swab32s(&swap_header->info.nr_badpages);
    2191         [ #  # ]:          0 :                 for (i = 0; i < swap_header->info.nr_badpages; i++)
    2192                 :            :                         swab32s(&swap_header->info.badpages[i]);
    2193                 :            :         }
    2194                 :            :         /* Check the swap header's sub-version */
    2195         [ -  + ]:         34 :         if (swap_header->info.version != 1) {
    2196                 :          0 :                 pr_warn("Unable to handle swap header version %d\n",
    2197                 :            :                         swap_header->info.version);
    2198                 :          0 :                 return 0;
    2199                 :            :         }
    2200                 :            : 
    2201                 :         34 :         p->lowest_bit  = 1;
    2202                 :         34 :         p->cluster_next = 1;
    2203                 :         34 :         p->cluster_nr = 0;
    2204                 :            : 
    2205                 :            :         /*
    2206                 :            :          * Find out how many pages are allowed for a single swap
    2207                 :            :          * device. There are two limiting factors: 1) the number
    2208                 :            :          * of bits for the swap offset in the swp_entry_t type, and
    2209                 :            :          * 2) the number of bits in the swap pte as defined by the
    2210                 :            :          * different architectures. In order to find the
    2211                 :            :          * largest possible bit mask, a swap entry with swap type 0
    2212                 :            :          * and swap offset ~0UL is created, encoded to a swap pte,
    2213                 :            :          * decoded to a swp_entry_t again, and finally the swap
    2214                 :            :          * offset is extracted. This will mask all the bits from
    2215                 :            :          * the initial ~0UL mask that can't be encoded in either
    2216                 :            :          * the swp_entry_t or the architecture definition of a
    2217                 :            :          * swap pte.
    2218                 :            :          */
    2219                 :            :         maxpages = swp_offset(pte_to_swp_entry(
    2220                 :            :                         swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
    2221                 :         34 :         last_page = swap_header->info.last_page;
    2222         [ -  + ]:         34 :         if (last_page > maxpages) {
    2223                 :          0 :                 pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
    2224                 :            :                         maxpages << (PAGE_SHIFT - 10),
    2225                 :            :                         last_page << (PAGE_SHIFT - 10));
    2226                 :            :         }
    2227         [ +  - ]:         69 :         if (maxpages > last_page) {
    2228                 :         34 :                 maxpages = last_page + 1;
    2229                 :            :                 /* p->max is an unsigned int: don't overflow it */
    2230         [ -  + ]:         34 :                 if ((unsigned int)maxpages == 0)
    2231                 :            :                         maxpages = UINT_MAX;
    2232                 :            :         }
    2233                 :         34 :         p->highest_bit = maxpages - 1;
    2234                 :            : 
    2235         [ +  - ]:         34 :         if (!maxpages)
    2236                 :            :                 return 0;
    2237                 :         34 :         swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
    2238         [ -  + ]:         34 :         if (swapfilepages && maxpages > swapfilepages) {
    2239                 :          0 :                 pr_warn("Swap area shorter than signature indicates\n");
    2240                 :          0 :                 return 0;
    2241                 :            :         }
    2242 [ -  + ][ #  # ]:         34 :         if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
    2243                 :            :                 return 0;
    2244         [ +  - ]:         34 :         if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
    2245                 :            :                 return 0;
    2246                 :            : 
    2247                 :         34 :         return maxpages;
    2248                 :            : }
    2249                 :            : 
    2250                 :          0 : static int setup_swap_map_and_extents(struct swap_info_struct *p,
    2251                 :            :                                         union swap_header *swap_header,
    2252                 :            :                                         unsigned char *swap_map,
    2253                 :            :                                         struct swap_cluster_info *cluster_info,
    2254                 :            :                                         unsigned long maxpages,
    2255                 :            :                                         sector_t *span)
    2256                 :            : {
    2257                 :            :         int i;
    2258                 :            :         unsigned int nr_good_pages;
    2259                 :            :         int nr_extents;
    2260                 :         34 :         unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
    2261                 :         34 :         unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER;
    2262                 :            : 
    2263                 :         34 :         nr_good_pages = maxpages - 1;   /* omit header page */
    2264                 :            : 
    2265                 :            :         cluster_set_null(&p->free_cluster_head);
    2266                 :            :         cluster_set_null(&p->free_cluster_tail);
    2267                 :            :         cluster_set_null(&p->discard_cluster_head);
    2268                 :            :         cluster_set_null(&p->discard_cluster_tail);
    2269                 :            : 
    2270         [ -  + ]:         34 :         for (i = 0; i < swap_header->info.nr_badpages; i++) {
    2271                 :          0 :                 unsigned int page_nr = swap_header->info.badpages[i];
    2272 [ #  # ][ #  # ]:          0 :                 if (page_nr == 0 || page_nr > swap_header->info.last_page)
    2273                 :            :                         return -EINVAL;
    2274         [ #  # ]:          0 :                 if (page_nr < maxpages) {
    2275                 :          0 :                         swap_map[page_nr] = SWAP_MAP_BAD;
    2276                 :          0 :                         nr_good_pages--;
    2277                 :            :                         /*
    2278                 :            :                          * Haven't marked the cluster free yet, no list
    2279                 :            :                          * operation involved
    2280                 :            :                          */
    2281                 :          0 :                         inc_cluster_info_page(p, cluster_info, page_nr);
    2282                 :            :                 }
    2283                 :            :         }
    2284                 :            : 
    2285                 :            :         /* Haven't marked the cluster free yet, no list operation involved */
    2286         [ +  + ]:       7906 :         for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
    2287                 :       7872 :                 inc_cluster_info_page(p, cluster_info, i);
    2288                 :            : 
    2289         [ +  - ]:         34 :         if (nr_good_pages) {
    2290                 :         34 :                 swap_map[0] = SWAP_MAP_BAD;
    2291                 :            :                 /*
    2292                 :            :                  * Not mark the cluster free yet, no list
    2293                 :            :                  * operation involved
    2294                 :            :                  */
    2295                 :         34 :                 inc_cluster_info_page(p, cluster_info, 0);
    2296                 :         34 :                 p->max = maxpages;
    2297                 :         34 :                 p->pages = nr_good_pages;
    2298                 :         34 :                 nr_extents = setup_swap_extents(p, span);
    2299         [ +  - ]:         34 :                 if (nr_extents < 0)
    2300                 :            :                         return nr_extents;
    2301                 :         34 :                 nr_good_pages = p->pages;
    2302                 :            :         }
    2303         [ -  + ]:         34 :         if (!nr_good_pages) {
    2304                 :          0 :                 pr_warn("Empty swap-file\n");
    2305                 :          0 :                 return -EINVAL;
    2306                 :            :         }
    2307                 :            : 
    2308         [ -  + ]:         34 :         if (!cluster_info)
    2309                 :            :                 return nr_extents;
    2310                 :            : 
    2311            [ - ]:          0 :         for (i = 0; i < nr_clusters; i++) {
    2312         [ #  # ]:          0 :                 if (!cluster_count(&cluster_info[idx])) {
    2313                 :            :                         cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
    2314         [ #  # ]:          0 :                         if (cluster_is_null(&p->free_cluster_head)) {
    2315                 :            :                                 cluster_set_next_flag(&p->free_cluster_head,
    2316                 :            :                                                                 idx, 0);
    2317                 :            :                                 cluster_set_next_flag(&p->free_cluster_tail,
    2318                 :            :                                                                 idx, 0);
    2319                 :            :                         } else {
    2320                 :            :                                 unsigned int tail;
    2321                 :            : 
    2322                 :            :                                 tail = cluster_next(&p->free_cluster_tail);
    2323                 :          0 :                                 cluster_set_next(&cluster_info[tail], idx);
    2324                 :            :                                 cluster_set_next_flag(&p->free_cluster_tail,
    2325                 :            :                                                                 idx, 0);
    2326                 :            :                         }
    2327                 :            :                 }
    2328                 :          0 :                 idx++;
    2329         [ #  # ]:          0 :                 if (idx == nr_clusters)
    2330                 :            :                         idx = 0;
    2331                 :            :         }
    2332                 :            :         return nr_extents;
    2333                 :            : }
    2334                 :            : 
    2335                 :            : /*
    2336                 :            :  * Helper to sys_swapon determining if a given swap
    2337                 :            :  * backing device queue supports DISCARD operations.
    2338                 :            :  */
    2339                 :          0 : static bool swap_discardable(struct swap_info_struct *si)
    2340                 :            : {
    2341                 :          0 :         struct request_queue *q = bdev_get_queue(si->bdev);
    2342                 :            : 
    2343 [ #  # ][ #  # ]:          0 :         if (!q || !blk_queue_discard(q))
    2344                 :            :                 return false;
    2345                 :            : 
    2346                 :            :         return true;
    2347                 :            : }
    2348                 :            : 
    2349                 :          0 : SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
    2350                 :            : {
    2351                 :            :         struct swap_info_struct *p;
    2352                 :            :         struct filename *name;
    2353                 :            :         struct file *swap_file = NULL;
    2354                 :            :         struct address_space *mapping;
    2355                 :            :         int i;
    2356                 :            :         int prio;
    2357                 :            :         int error;
    2358                 :            :         union swap_header *swap_header;
    2359                 :            :         int nr_extents;
    2360                 :            :         sector_t span;
    2361                 :            :         unsigned long maxpages;
    2362                 :            :         unsigned char *swap_map = NULL;
    2363                 :            :         struct swap_cluster_info *cluster_info = NULL;
    2364                 :            :         unsigned long *frontswap_map = NULL;
    2365                 :            :         struct page *page = NULL;
    2366                 :            :         struct inode *inode = NULL;
    2367                 :            : 
    2368         [ +  - ]:         39 :         if (swap_flags & ~SWAP_FLAGS_VALID)
    2369                 :            :                 return -EINVAL;
    2370                 :            : 
    2371         [ +  + ]:         39 :         if (!capable(CAP_SYS_ADMIN))
    2372                 :            :                 return -EPERM;
    2373                 :            : 
    2374                 :         38 :         p = alloc_swap_info();
    2375         [ +  + ]:         38 :         if (IS_ERR(p))
    2376                 :            :                 return PTR_ERR(p);
    2377                 :            : 
    2378                 :         74 :         INIT_WORK(&p->discard_work, swap_discard_work);
    2379                 :            : 
    2380                 :         37 :         name = getname(specialfile);
    2381         [ -  + ]:         37 :         if (IS_ERR(name)) {
    2382                 :            :                 error = PTR_ERR(name);
    2383                 :            :                 name = NULL;
    2384                 :            :                 goto bad_swap;
    2385                 :            :         }
    2386                 :         37 :         swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
    2387         [ +  + ]:         37 :         if (IS_ERR(swap_file)) {
    2388                 :            :                 error = PTR_ERR(swap_file);
    2389                 :            :                 swap_file = NULL;
    2390                 :            :                 goto bad_swap;
    2391                 :            :         }
    2392                 :            : 
    2393                 :         36 :         p->swap_file = swap_file;
    2394                 :         36 :         mapping = swap_file->f_mapping;
    2395                 :            : 
    2396         [ +  + ]:        507 :         for (i = 0; i < nr_swapfiles; i++) {
    2397                 :        472 :                 struct swap_info_struct *q = swap_info[i];
    2398                 :            : 
    2399 [ +  + ][ +  + ]:        472 :                 if (q == p || !q->swap_file)
    2400                 :         36 :                         continue;
    2401         [ +  + ]:        436 :                 if (mapping == q->swap_file->f_mapping) {
    2402                 :            :                         error = -EBUSY;
    2403                 :            :                         goto bad_swap;
    2404                 :            :                 }
    2405                 :            :         }
    2406                 :            : 
    2407                 :         35 :         inode = mapping->host;
    2408                 :            :         /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
    2409                 :         35 :         error = claim_swapfile(p, inode);
    2410         [ +  - ]:         35 :         if (unlikely(error))
    2411                 :            :                 goto bad_swap;
    2412                 :            : 
    2413                 :            :         /*
    2414                 :            :          * Read the swap header.
    2415                 :            :          */
    2416         [ +  - ]:         35 :         if (!mapping->a_ops->readpage) {
    2417                 :            :                 error = -EINVAL;
    2418                 :            :                 goto bad_swap;
    2419                 :            :         }
    2420                 :            :         page = read_mapping_page(mapping, 0, swap_file);
    2421         [ -  + ]:         35 :         if (IS_ERR(page)) {
    2422                 :            :                 error = PTR_ERR(page);
    2423                 :            :                 goto bad_swap;
    2424                 :            :         }
    2425                 :         35 :         swap_header = kmap(page);
    2426                 :            : 
    2427                 :         35 :         maxpages = read_swap_header(p, swap_header, inode);
    2428         [ +  + ]:         35 :         if (unlikely(!maxpages)) {
    2429                 :            :                 error = -EINVAL;
    2430                 :            :                 goto bad_swap;
    2431                 :            :         }
    2432                 :            : 
    2433                 :            :         /* OK, set up the swap map and apply the bad block list */
    2434                 :         34 :         swap_map = vzalloc(maxpages);
    2435         [ +  - ]:         34 :         if (!swap_map) {
    2436                 :            :                 error = -ENOMEM;
    2437                 :            :                 goto bad_swap;
    2438                 :            :         }
    2439 [ +  - ][ -  + ]:         34 :         if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
    2440                 :          0 :                 p->flags |= SWP_SOLIDSTATE;
    2441                 :            :                 /*
    2442                 :            :                  * select a random position to start with to help wear leveling
    2443                 :            :                  * SSD
    2444                 :            :                  */
    2445                 :          0 :                 p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
    2446                 :            : 
    2447                 :          0 :                 cluster_info = vzalloc(DIV_ROUND_UP(maxpages,
    2448                 :            :                         SWAPFILE_CLUSTER) * sizeof(*cluster_info));
    2449         [ #  # ]:          0 :                 if (!cluster_info) {
    2450                 :            :                         error = -ENOMEM;
    2451                 :            :                         goto bad_swap;
    2452                 :            :                 }
    2453                 :          0 :                 p->percpu_cluster = alloc_percpu(struct percpu_cluster);
    2454         [ #  # ]:          0 :                 if (!p->percpu_cluster) {
    2455                 :            :                         error = -ENOMEM;
    2456                 :            :                         goto bad_swap;
    2457                 :            :                 }
    2458         [ #  # ]:          0 :                 for_each_possible_cpu(i) {
    2459                 :            :                         struct percpu_cluster *cluster;
    2460                 :          0 :                         cluster = per_cpu_ptr(p->percpu_cluster, i);
    2461                 :            :                         cluster_set_null(&cluster->index);
    2462                 :            :                 }
    2463                 :            :         }
    2464                 :            : 
    2465                 :            :         error = swap_cgroup_swapon(p->type, maxpages);
    2466                 :            :         if (error)
    2467                 :            :                 goto bad_swap;
    2468                 :            : 
    2469                 :         34 :         nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
    2470                 :            :                 cluster_info, maxpages, &span);
    2471         [ +  - ]:         34 :         if (unlikely(nr_extents < 0)) {
    2472                 :            :                 error = nr_extents;
    2473                 :            :                 goto bad_swap;
    2474                 :            :         }
    2475                 :            :         /* frontswap enabled? set up bit-per-page map for frontswap */
    2476                 :            :         if (frontswap_enabled)
    2477                 :            :                 frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
    2478                 :            : 
    2479 [ +  - ][ -  + ]:         34 :         if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
                 [ #  # ]
    2480                 :            :                 /*
    2481                 :            :                  * When discard is enabled for swap with no particular
    2482                 :            :                  * policy flagged, we set all swap discard flags here in
    2483                 :            :                  * order to sustain backward compatibility with older
    2484                 :            :                  * swapon(8) releases.
    2485                 :            :                  */
    2486                 :          0 :                 p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
    2487                 :            :                              SWP_PAGE_DISCARD);
    2488                 :            : 
    2489                 :            :                 /*
    2490                 :            :                  * By flagging sys_swapon, a sysadmin can tell us to
    2491                 :            :                  * either do single-time area discards only, or to just
    2492                 :            :                  * perform discards for released swap page-clusters.
    2493                 :            :                  * Now it's time to adjust the p->flags accordingly.
    2494                 :            :                  */
    2495         [ #  # ]:          0 :                 if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
    2496                 :          0 :                         p->flags &= ~SWP_PAGE_DISCARD;
    2497         [ #  # ]:          0 :                 else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
    2498                 :          0 :                         p->flags &= ~SWP_AREA_DISCARD;
    2499                 :            : 
    2500                 :            :                 /* issue a swapon-time discard if it's still required */
    2501         [ #  # ]:          0 :                 if (p->flags & SWP_AREA_DISCARD) {
    2502                 :          0 :                         int err = discard_swap(p);
    2503         [ #  # ]:          0 :                         if (unlikely(err))
    2504                 :          0 :                                 pr_err("swapon: discard_swap(%p): %d\n",
    2505                 :            :                                         p, err);
    2506                 :            :                 }
    2507                 :            :         }
    2508                 :            : 
    2509                 :         34 :         mutex_lock(&swapon_mutex);
    2510                 :            :         prio = -1;
    2511         [ -  + ]:         34 :         if (swap_flags & SWAP_FLAG_PREFER)
    2512                 :          0 :                 prio =
    2513                 :            :                   (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
    2514                 :         34 :         enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map);
    2515                 :            : 
    2516 [ +  - ][ +  - ]:         34 :         pr_info("Adding %uk swap on %s.  "
         [ +  - ][ +  - ]
    2517                 :            :                         "Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
    2518                 :            :                 p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
    2519                 :            :                 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
    2520                 :            :                 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
    2521                 :            :                 (p->flags & SWP_DISCARDABLE) ? "D" : "",
    2522                 :            :                 (p->flags & SWP_AREA_DISCARD) ? "s" : "",
    2523                 :            :                 (p->flags & SWP_PAGE_DISCARD) ? "c" : "",
    2524                 :            :                 (frontswap_map) ? "FS" : "");
    2525                 :            : 
    2526                 :         34 :         mutex_unlock(&swapon_mutex);
    2527                 :            :         atomic_inc(&proc_poll_event);
    2528                 :         34 :         wake_up_interruptible(&proc_poll_wait);
    2529                 :            : 
    2530         [ +  - ]:         34 :         if (S_ISREG(inode->i_mode))
    2531                 :         34 :                 inode->i_flags |= S_SWAPFILE;
    2532                 :            :         error = 0;
    2533                 :            :         goto out;
    2534                 :            : bad_swap:
    2535                 :          3 :         free_percpu(p->percpu_cluster);
    2536                 :          3 :         p->percpu_cluster = NULL;
    2537 [ +  + ][ -  + ]:          3 :         if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
                 [ #  # ]
    2538                 :          0 :                 set_blocksize(p->bdev, p->old_block_size);
    2539                 :          0 :                 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
    2540                 :            :         }
    2541                 :          3 :         destroy_swap_extents(p);
    2542                 :            :         swap_cgroup_swapoff(p->type);
    2543                 :            :         spin_lock(&swap_lock);
    2544                 :          3 :         p->swap_file = NULL;
    2545                 :          3 :         p->flags = 0;
    2546                 :            :         spin_unlock(&swap_lock);
    2547                 :          3 :         vfree(swap_map);
    2548                 :          3 :         vfree(cluster_info);
    2549         [ +  + ]:          3 :         if (swap_file) {
    2550 [ +  + ][ +  - ]:          2 :                 if (inode && S_ISREG(inode->i_mode)) {
    2551                 :          1 :                         mutex_unlock(&inode->i_mutex);
    2552                 :            :                         inode = NULL;
    2553                 :            :                 }
    2554                 :          2 :                 filp_close(swap_file, NULL);
    2555                 :            :         }
    2556                 :            : out:
    2557 [ +  + ][ +  - ]:         37 :         if (page && !IS_ERR(page)) {
    2558                 :         35 :                 kunmap(page);
    2559                 :         35 :                 page_cache_release(page);
    2560                 :            :         }
    2561         [ +  - ]:         37 :         if (name)
    2562                 :         37 :                 putname(name);
    2563 [ +  + ][ +  - ]:         37 :         if (inode && S_ISREG(inode->i_mode))
    2564                 :         34 :                 mutex_unlock(&inode->i_mutex);
    2565                 :            :         return error;
    2566                 :            : }
    2567                 :            : 
    2568                 :          0 : void si_swapinfo(struct sysinfo *val)
    2569                 :            : {
    2570                 :            :         unsigned int type;
    2571                 :            :         unsigned long nr_to_be_unused = 0;
    2572                 :            : 
    2573                 :            :         spin_lock(&swap_lock);
    2574         [ +  + ]:     103246 :         for (type = 0; type < nr_swapfiles; type++) {
    2575                 :      83100 :                 struct swap_info_struct *si = swap_info[type];
    2576                 :            : 
    2577         [ -  + ]:      83100 :                 if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
    2578                 :          0 :                         nr_to_be_unused += si->inuse_pages;
    2579                 :            :         }
    2580                 :      10073 :         val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
    2581                 :      10073 :         val->totalswap = total_swap_pages + nr_to_be_unused;
    2582                 :            :         spin_unlock(&swap_lock);
    2583                 :      10073 : }
    2584                 :            : 
    2585                 :            : /*
    2586                 :            :  * Verify that a swap entry is valid and increment its swap map count.
    2587                 :            :  *
    2588                 :            :  * Returns error code in following case.
    2589                 :            :  * - success -> 0
    2590                 :            :  * - swp_entry is invalid -> EINVAL
    2591                 :            :  * - swp_entry is migration entry -> EINVAL
    2592                 :            :  * - swap-cache reference is requested but there is already one. -> EEXIST
    2593                 :            :  * - swap-cache reference is requested but the entry is not used. -> ENOENT
    2594                 :            :  * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
    2595                 :            :  */
    2596                 :          0 : static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
    2597                 :            : {
    2598                 :            :         struct swap_info_struct *p;
    2599                 :            :         unsigned long offset, type;
    2600                 :            :         unsigned char count;
    2601                 :            :         unsigned char has_cache;
    2602                 :            :         int err = -EINVAL;
    2603                 :            : 
    2604         [ #  # ]:          0 :         if (non_swap_entry(entry))
    2605                 :            :                 goto out;
    2606                 :            : 
    2607                 :            :         type = swp_type(entry);
    2608         [ #  # ]:          0 :         if (type >= nr_swapfiles)
    2609                 :            :                 goto bad_file;
    2610                 :          0 :         p = swap_info[type];
    2611                 :            :         offset = swp_offset(entry);
    2612                 :            : 
    2613                 :            :         spin_lock(&p->lock);
    2614         [ #  # ]:          0 :         if (unlikely(offset >= p->max))
    2615                 :            :                 goto unlock_out;
    2616                 :            : 
    2617                 :          0 :         count = p->swap_map[offset];
    2618                 :            : 
    2619                 :            :         /*
    2620                 :            :          * swapin_readahead() doesn't check if a swap entry is valid, so the
    2621                 :            :          * swap entry could be SWAP_MAP_BAD. Check here with lock held.
    2622                 :            :          */
    2623         [ #  # ]:          0 :         if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
    2624                 :            :                 err = -ENOENT;
    2625                 :            :                 goto unlock_out;
    2626                 :            :         }
    2627                 :            : 
    2628                 :          0 :         has_cache = count & SWAP_HAS_CACHE;
    2629                 :            :         count &= ~SWAP_HAS_CACHE;
    2630                 :            :         err = 0;
    2631                 :            : 
    2632         [ #  # ]:          0 :         if (usage == SWAP_HAS_CACHE) {
    2633                 :            : 
    2634                 :            :                 /* set SWAP_HAS_CACHE if there is no cache and entry is used */
    2635         [ #  # ]:          0 :                 if (!has_cache && count)
    2636                 :            :                         has_cache = SWAP_HAS_CACHE;
    2637         [ #  # ]:          0 :                 else if (has_cache)             /* someone else added cache */
    2638                 :            :                         err = -EEXIST;
    2639                 :            :                 else                            /* no users remaining */
    2640                 :            :                         err = -ENOENT;
    2641                 :            : 
    2642         [ #  # ]:          0 :         } else if (count || has_cache) {
    2643                 :            : 
    2644         [ #  # ]:          0 :                 if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
    2645                 :          0 :                         count += usage;
    2646         [ #  # ]:          0 :                 else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
    2647                 :            :                         err = -EINVAL;
    2648         [ #  # ]:          0 :                 else if (swap_count_continued(p, offset, count))
    2649                 :            :                         count = COUNT_CONTINUED;
    2650                 :            :                 else
    2651                 :            :                         err = -ENOMEM;
    2652                 :            :         } else
    2653                 :            :                 err = -ENOENT;                  /* unused swap entry */
    2654                 :            : 
    2655                 :          0 :         p->swap_map[offset] = count | has_cache;
    2656                 :            : 
    2657                 :            : unlock_out:
    2658                 :            :         spin_unlock(&p->lock);
    2659                 :            : out:
    2660                 :          0 :         return err;
    2661                 :            : 
    2662                 :            : bad_file:
    2663                 :          0 :         pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val);
    2664                 :          0 :         goto out;
    2665                 :            : }
    2666                 :            : 
    2667                 :            : /*
    2668                 :            :  * Help swapoff by noting that swap entry belongs to shmem/tmpfs
    2669                 :            :  * (in which case its reference count is never incremented).
    2670                 :            :  */
    2671                 :          0 : void swap_shmem_alloc(swp_entry_t entry)
    2672                 :            : {
    2673                 :          0 :         __swap_duplicate(entry, SWAP_MAP_SHMEM);
    2674                 :          0 : }
    2675                 :            : 
    2676                 :            : /*
    2677                 :            :  * Increase reference count of swap entry by 1.
    2678                 :            :  * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
    2679                 :            :  * but could not be atomically allocated.  Returns 0, just as if it succeeded,
    2680                 :            :  * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
    2681                 :            :  * might occur if a page table entry has got corrupted.
    2682                 :            :  */
    2683                 :          0 : int swap_duplicate(swp_entry_t entry)
    2684                 :            : {
    2685                 :            :         int err = 0;
    2686                 :            : 
    2687 [ #  # ][ #  # ]:          0 :         while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
    2688                 :          0 :                 err = add_swap_count_continuation(entry, GFP_ATOMIC);
    2689                 :          0 :         return err;
    2690                 :            : }
    2691                 :            : 
    2692                 :            : /*
    2693                 :            :  * @entry: swap entry for which we allocate swap cache.
    2694                 :            :  *
    2695                 :            :  * Called when allocating swap cache for existing swap entry,
    2696                 :            :  * This can return error codes. Returns 0 at success.
    2697                 :            :  * -EBUSY means there is a swap cache.
    2698                 :            :  * Note: return code is different from swap_duplicate().
    2699                 :            :  */
    2700                 :          0 : int swapcache_prepare(swp_entry_t entry)
    2701                 :            : {
    2702                 :          0 :         return __swap_duplicate(entry, SWAP_HAS_CACHE);
    2703                 :            : }
    2704                 :            : 
    2705                 :          0 : struct swap_info_struct *page_swap_info(struct page *page)
    2706                 :            : {
    2707                 :          0 :         swp_entry_t swap = { .val = page_private(page) };
    2708         [ #  # ]:          0 :         BUG_ON(!PageSwapCache(page));
    2709                 :          0 :         return swap_info[swp_type(swap)];
    2710                 :            : }
    2711                 :            : 
    2712                 :            : /*
    2713                 :            :  * out-of-line __page_file_ methods to avoid include hell.
    2714                 :            :  */
    2715                 :          0 : struct address_space *__page_file_mapping(struct page *page)
    2716                 :            : {
    2717                 :            :         VM_BUG_ON(!PageSwapCache(page));
    2718                 :          0 :         return page_swap_info(page)->swap_file->f_mapping;
    2719                 :            : }
    2720                 :            : EXPORT_SYMBOL_GPL(__page_file_mapping);
    2721                 :            : 
    2722                 :          0 : pgoff_t __page_file_index(struct page *page)
    2723                 :            : {
    2724                 :          0 :         swp_entry_t swap = { .val = page_private(page) };
    2725                 :            :         VM_BUG_ON(!PageSwapCache(page));
    2726                 :          0 :         return swp_offset(swap);
    2727                 :            : }
    2728                 :            : EXPORT_SYMBOL_GPL(__page_file_index);
    2729                 :            : 
    2730                 :            : /*
    2731                 :            :  * add_swap_count_continuation - called when a swap count is duplicated
    2732                 :            :  * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
    2733                 :            :  * page of the original vmalloc'ed swap_map, to hold the continuation count
    2734                 :            :  * (for that entry and for its neighbouring PAGE_SIZE swap entries).  Called
    2735                 :            :  * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc.
    2736                 :            :  *
    2737                 :            :  * These continuation pages are seldom referenced: the common paths all work
    2738                 :            :  * on the original swap_map, only referring to a continuation page when the
    2739                 :            :  * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX.
    2740                 :            :  *
    2741                 :            :  * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding
    2742                 :            :  * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
    2743                 :            :  * can be called after dropping locks.
    2744                 :            :  */
    2745                 :          0 : int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
    2746                 :            : {
    2747                 :            :         struct swap_info_struct *si;
    2748                 :            :         struct page *head;
    2749                 :            :         struct page *page;
    2750                 :            :         struct page *list_page;
    2751                 :            :         pgoff_t offset;
    2752                 :            :         unsigned char count;
    2753                 :            : 
    2754                 :            :         /*
    2755                 :            :          * When debugging, it's easier to use __GFP_ZERO here; but it's better
    2756                 :            :          * for latency not to zero a page while GFP_ATOMIC and holding locks.
    2757                 :            :          */
    2758                 :          0 :         page = alloc_page(gfp_mask | __GFP_HIGHMEM);
    2759                 :            : 
    2760                 :          0 :         si = swap_info_get(entry);
    2761         [ #  # ]:          0 :         if (!si) {
    2762                 :            :                 /*
    2763                 :            :                  * An acceptable race has occurred since the failing
    2764                 :            :                  * __swap_duplicate(): the swap entry has been freed,
    2765                 :            :                  * perhaps even the whole swap_map cleared for swapoff.
    2766                 :            :                  */
    2767                 :            :                 goto outer;
    2768                 :            :         }
    2769                 :            : 
    2770                 :            :         offset = swp_offset(entry);
    2771                 :          0 :         count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
    2772                 :            : 
    2773         [ #  # ]:          0 :         if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
    2774                 :            :                 /*
    2775                 :            :                  * The higher the swap count, the more likely it is that tasks
    2776                 :            :                  * will race to add swap count continuation: we need to avoid
    2777                 :            :                  * over-provisioning.
    2778                 :            :                  */
    2779                 :            :                 goto out;
    2780                 :            :         }
    2781                 :            : 
    2782         [ #  # ]:          0 :         if (!page) {
    2783                 :            :                 spin_unlock(&si->lock);
    2784                 :          0 :                 return -ENOMEM;
    2785                 :            :         }
    2786                 :            : 
    2787                 :            :         /*
    2788                 :            :          * We are fortunate that although vmalloc_to_page uses pte_offset_map,
    2789                 :            :          * no architecture is using highmem pages for kernel page tables: so it
    2790                 :            :          * will not corrupt the GFP_ATOMIC caller's atomic page table kmaps.
    2791                 :            :          */
    2792                 :          0 :         head = vmalloc_to_page(si->swap_map + offset);
    2793                 :          0 :         offset &= ~PAGE_MASK;
    2794                 :            : 
    2795                 :            :         /*
    2796                 :            :          * Page allocation does not initialize the page's lru field,
    2797                 :            :          * but it does always reset its private field.
    2798                 :            :          */
    2799         [ #  # ]:          0 :         if (!page_private(head)) {
    2800         [ #  # ]:          0 :                 BUG_ON(count & COUNT_CONTINUED);
    2801                 :          0 :                 INIT_LIST_HEAD(&head->lru);
    2802                 :          0 :                 set_page_private(head, SWP_CONTINUED);
    2803                 :          0 :                 si->flags |= SWP_CONTINUED;
    2804                 :            :         }
    2805                 :            : 
    2806         [ #  # ]:          0 :         list_for_each_entry(list_page, &head->lru, lru) {
    2807                 :            :                 unsigned char *map;
    2808                 :            : 
    2809                 :            :                 /*
    2810                 :            :                  * If the previous map said no continuation, but we've found
    2811                 :            :                  * a continuation page, free our allocation and use this one.
    2812                 :            :                  */
    2813         [ #  # ]:          0 :                 if (!(count & COUNT_CONTINUED))
    2814                 :            :                         goto out;
    2815                 :            : 
    2816                 :          0 :                 map = kmap_atomic(list_page) + offset;
    2817                 :          0 :                 count = *map;
    2818                 :          0 :                 kunmap_atomic(map);
    2819                 :            : 
    2820                 :            :                 /*
    2821                 :            :                  * If this continuation count now has some space in it,
    2822                 :            :                  * free our allocation and use this one.
    2823                 :            :                  */
    2824         [ #  # ]:          0 :                 if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
    2825                 :            :                         goto out;
    2826                 :            :         }
    2827                 :            : 
    2828                 :          0 :         list_add_tail(&page->lru, &head->lru);
    2829                 :            :         page = NULL;                    /* now it's attached, don't free it */
    2830                 :            : out:
    2831                 :            :         spin_unlock(&si->lock);
    2832                 :            : outer:
    2833         [ #  # ]:          0 :         if (page)
    2834                 :          0 :                 __free_page(page);
    2835                 :            :         return 0;
    2836                 :            : }
    2837                 :            : 
    2838                 :            : /*
    2839                 :            :  * swap_count_continued - when the original swap_map count is incremented
    2840                 :            :  * from SWAP_MAP_MAX, check if there is already a continuation page to carry
    2841                 :            :  * into, carry if so, or else fail until a new continuation page is allocated;
    2842                 :            :  * when the original swap_map count is decremented from 0 with continuation,
    2843                 :            :  * borrow from the continuation and report whether it still holds more.
    2844                 :            :  * Called while __swap_duplicate() or swap_entry_free() holds swap_lock.
    2845                 :            :  */
    2846                 :          0 : static bool swap_count_continued(struct swap_info_struct *si,
    2847                 :            :                                  pgoff_t offset, unsigned char count)
    2848                 :            : {
    2849                 :            :         struct page *head;
    2850                 :            :         struct page *page;
    2851                 :            :         unsigned char *map;
    2852                 :            : 
    2853                 :          0 :         head = vmalloc_to_page(si->swap_map + offset);
    2854         [ #  # ]:          0 :         if (page_private(head) != SWP_CONTINUED) {
    2855         [ #  # ]:          0 :                 BUG_ON(count & COUNT_CONTINUED);
    2856                 :            :                 return false;           /* need to add count continuation */
    2857                 :            :         }
    2858                 :            : 
    2859                 :          0 :         offset &= ~PAGE_MASK;
    2860                 :          0 :         page = list_entry(head->lru.next, struct page, lru);
    2861                 :          0 :         map = kmap_atomic(page) + offset;
    2862                 :            : 
    2863         [ #  # ]:          0 :         if (count == SWAP_MAP_MAX)      /* initial increment from swap_map */
    2864                 :            :                 goto init_map;          /* jump over SWAP_CONT_MAX checks */
    2865                 :            : 
    2866         [ #  # ]:          0 :         if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */
    2867                 :            :                 /*
    2868                 :            :                  * Think of how you add 1 to 999
    2869                 :            :                  */
    2870         [ #  # ]:          0 :                 while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
    2871                 :          0 :                         kunmap_atomic(map);
    2872                 :          0 :                         page = list_entry(page->lru.next, struct page, lru);
    2873         [ #  # ]:          0 :                         BUG_ON(page == head);
    2874                 :          0 :                         map = kmap_atomic(page) + offset;
    2875                 :            :                 }
    2876         [ #  # ]:          0 :                 if (*map == SWAP_CONT_MAX) {
    2877                 :          0 :                         kunmap_atomic(map);
    2878                 :          0 :                         page = list_entry(page->lru.next, struct page, lru);
    2879         [ #  # ]:          0 :                         if (page == head)
    2880                 :            :                                 return false;   /* add count continuation */
    2881                 :          0 :                         map = kmap_atomic(page) + offset;
    2882                 :          0 : init_map:               *map = 0;               /* we didn't zero the page */
    2883                 :            :                 }
    2884                 :          0 :                 *map += 1;
    2885                 :          0 :                 kunmap_atomic(map);
    2886                 :          0 :                 page = list_entry(page->lru.prev, struct page, lru);
    2887         [ #  # ]:          0 :                 while (page != head) {
    2888                 :          0 :                         map = kmap_atomic(page) + offset;
    2889                 :          0 :                         *map = COUNT_CONTINUED;
    2890                 :          0 :                         kunmap_atomic(map);
    2891                 :          0 :                         page = list_entry(page->lru.prev, struct page, lru);
    2892                 :            :                 }
    2893                 :            :                 return true;                    /* incremented */
    2894                 :            : 
    2895                 :            :         } else {                                /* decrementing */
    2896                 :            :                 /*
    2897                 :            :                  * Think of how you subtract 1 from 1000
    2898                 :            :                  */
    2899         [ #  # ]:          0 :                 BUG_ON(count != COUNT_CONTINUED);
    2900         [ #  # ]:          0 :                 while (*map == COUNT_CONTINUED) {
    2901                 :          0 :                         kunmap_atomic(map);
    2902                 :          0 :                         page = list_entry(page->lru.next, struct page, lru);
    2903         [ #  # ]:          0 :                         BUG_ON(page == head);
    2904                 :          0 :                         map = kmap_atomic(page) + offset;
    2905                 :            :                 }
    2906         [ #  # ]:          0 :                 BUG_ON(*map == 0);
    2907                 :          0 :                 *map -= 1;
    2908         [ #  # ]:          0 :                 if (*map == 0)
    2909                 :            :                         count = 0;
    2910                 :          0 :                 kunmap_atomic(map);
    2911                 :          0 :                 page = list_entry(page->lru.prev, struct page, lru);
    2912         [ #  # ]:          0 :                 while (page != head) {
    2913                 :          0 :                         map = kmap_atomic(page) + offset;
    2914                 :          0 :                         *map = SWAP_CONT_MAX | count;
    2915                 :            :                         count = COUNT_CONTINUED;
    2916                 :          0 :                         kunmap_atomic(map);
    2917                 :          0 :                         page = list_entry(page->lru.prev, struct page, lru);
    2918                 :            :                 }
    2919                 :          0 :                 return count == COUNT_CONTINUED;
    2920                 :            :         }
    2921                 :            : }
    2922                 :            : 
    2923                 :            : /*
    2924                 :            :  * free_swap_count_continuations - swapoff free all the continuation pages
    2925                 :            :  * appended to the swap_map, after swap_map is quiesced, before vfree'ing it.
    2926                 :            :  */
    2927                 :          0 : static void free_swap_count_continuations(struct swap_info_struct *si)
    2928                 :            : {
    2929                 :            :         pgoff_t offset;
    2930                 :            : 
    2931         [ #  # ]:          0 :         for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
    2932                 :            :                 struct page *head;
    2933                 :          0 :                 head = vmalloc_to_page(si->swap_map + offset);
    2934         [ #  # ]:          0 :                 if (page_private(head)) {
    2935                 :            :                         struct list_head *this, *next;
    2936         [ #  # ]:          0 :                         list_for_each_safe(this, next, &head->lru) {
    2937                 :            :                                 struct page *page;
    2938                 :          0 :                                 page = list_entry(this, struct page, lru);
    2939                 :            :                                 list_del(this);
    2940                 :          0 :                                 __free_page(page);
    2941                 :            :                         }
    2942                 :            :                 }
    2943                 :            :         }
    2944                 :          0 : }

Generated by: LCOV version 1.9