Branch data Line data Source code
1 : : /*
2 : : * mm/rmap.c - physical to virtual reverse mappings
3 : : *
4 : : * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
5 : : * Released under the General Public License (GPL).
6 : : *
7 : : * Simple, low overhead reverse mapping scheme.
8 : : * Please try to keep this thing as modular as possible.
9 : : *
10 : : * Provides methods for unmapping each kind of mapped page:
11 : : * the anon methods track anonymous pages, and
12 : : * the file methods track pages belonging to an inode.
13 : : *
14 : : * Original design by Rik van Riel <riel@conectiva.com.br> 2001
15 : : * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
16 : : * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
17 : : * Contributions by Hugh Dickins 2003, 2004
18 : : */
19 : :
20 : : /*
21 : : * Lock ordering in mm:
22 : : *
23 : : * inode->i_mutex (while writing or truncating, not reading or faulting)
24 : : * mm->mmap_sem
25 : : * page->flags PG_locked (lock_page)
26 : : * mapping->i_mmap_mutex
27 : : * anon_vma->rwsem
28 : : * mm->page_table_lock or pte_lock
29 : : * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
30 : : * swap_lock (in swap_duplicate, swap_info_get)
31 : : * mmlist_lock (in mmput, drain_mmlist and others)
32 : : * mapping->private_lock (in __set_page_dirty_buffers)
33 : : * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
34 : : * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
35 : : * sb_lock (within inode_lock in fs/fs-writeback.c)
36 : : * mapping->tree_lock (widely used, in set_page_dirty,
37 : : * in arch-dependent flush_dcache_mmap_lock,
38 : : * within bdi.wb->list_lock in __sync_single_inode)
39 : : *
40 : : * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
41 : : * ->tasklist_lock
42 : : * pte map lock
43 : : */
44 : :
45 : : #include <linux/mm.h>
46 : : #include <linux/pagemap.h>
47 : : #include <linux/swap.h>
48 : : #include <linux/swapops.h>
49 : : #include <linux/slab.h>
50 : : #include <linux/init.h>
51 : : #include <linux/ksm.h>
52 : : #include <linux/rmap.h>
53 : : #include <linux/rcupdate.h>
54 : : #include <linux/export.h>
55 : : #include <linux/memcontrol.h>
56 : : #include <linux/mmu_notifier.h>
57 : : #include <linux/migrate.h>
58 : : #include <linux/hugetlb.h>
59 : : #include <linux/backing-dev.h>
60 : :
61 : : #include <asm/tlbflush.h>
62 : :
63 : : #include "internal.h"
64 : :
65 : : static struct kmem_cache *anon_vma_cachep;
66 : : static struct kmem_cache *anon_vma_chain_cachep;
67 : :
68 : : static inline struct anon_vma *anon_vma_alloc(void)
69 : : {
70 : : struct anon_vma *anon_vma;
71 : :
72 : 13399410 : anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
73 [ + + ]: 13399428 : if (anon_vma) {
74 : 13399433 : atomic_set(&anon_vma->refcount, 1);
75 : : /*
76 : : * Initialise the anon_vma root to point to itself. If called
77 : : * from fork, the root will be reset to the parents anon_vma.
78 : : */
79 : 13399433 : anon_vma->root = anon_vma;
80 : : }
81 : :
82 : : return anon_vma;
83 : : }
84 : :
85 : 1664 : static inline void anon_vma_free(struct anon_vma *anon_vma)
86 : : {
87 : : VM_BUG_ON(atomic_read(&anon_vma->refcount));
88 : :
89 : : /*
90 : : * Synchronize against page_lock_anon_vma_read() such that
91 : : * we can safely hold the lock without the anon_vma getting
92 : : * freed.
93 : : *
94 : : * Relies on the full mb implied by the atomic_dec_and_test() from
95 : : * put_anon_vma() against the acquire barrier implied by
96 : : * down_read_trylock() from page_lock_anon_vma_read(). This orders:
97 : : *
98 : : * page_lock_anon_vma_read() VS put_anon_vma()
99 : : * down_read_trylock() atomic_dec_and_test()
100 : : * LOCK MB
101 : : * atomic_read() rwsem_is_locked()
102 : : *
103 : : * LOCK should suffice since the actual taking of the lock must
104 : : * happen _before_ what follows.
105 : : */
106 [ # # ][ + + ]: 13399441 : if (rwsem_is_locked(&anon_vma->root->rwsem)) {
107 : : anon_vma_lock_write(anon_vma);
108 : : anon_vma_unlock_write(anon_vma);
109 : : }
110 : :
111 : 13399433 : kmem_cache_free(anon_vma_cachep, anon_vma);
112 : : }
113 : :
114 : : static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
115 : : {
116 : 27434798 : return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
117 : : }
118 : :
119 : : static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
120 : : {
121 : 27434630 : kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
122 : : }
123 : :
124 : : static void anon_vma_chain_link(struct vm_area_struct *vma,
125 : : struct anon_vma_chain *avc,
126 : : struct anon_vma *anon_vma)
127 : : {
128 : 13194006 : avc->vma = vma;
129 : 13194006 : avc->anon_vma = anon_vma;
130 : 13194006 : list_add(&avc->same_vma, &vma->anon_vma_chain);
131 : 27434966 : anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
132 : : }
133 : :
134 : : /**
135 : : * anon_vma_prepare - attach an anon_vma to a memory region
136 : : * @vma: the memory region in question
137 : : *
138 : : * This makes sure the memory mapping described by 'vma' has
139 : : * an 'anon_vma' attached to it, so that we can associate the
140 : : * anonymous pages mapped into it with that anon_vma.
141 : : *
142 : : * The common case will be that we already have one, but if
143 : : * not we either need to find an adjacent mapping that we
144 : : * can re-use the anon_vma from (very common when the only
145 : : * reason for splitting a vma has been mprotect()), or we
146 : : * allocate a new one.
147 : : *
148 : : * Anon-vma allocations are very subtle, because we may have
149 : : * optimistically looked up an anon_vma in page_lock_anon_vma_read()
150 : : * and that may actually touch the spinlock even in the newly
151 : : * allocated vma (it depends on RCU to make sure that the
152 : : * anon_vma isn't actually destroyed).
153 : : *
154 : : * As a result, we need to do proper anon_vma locking even
155 : : * for the new allocation. At the same time, we do not want
156 : : * to do any locking for the common case of already having
157 : : * an anon_vma.
158 : : *
159 : : * This must be called with the mmap_sem held for reading.
160 : : */
161 : 0 : int anon_vma_prepare(struct vm_area_struct *vma)
162 : : {
163 : 1410779 : struct anon_vma *anon_vma = vma->anon_vma;
164 : : struct anon_vma_chain *avc;
165 : :
166 : : might_sleep();
167 [ + + ]: 28850105 : if (unlikely(!anon_vma)) {
168 : 705396 : struct mm_struct *mm = vma->vm_mm;
169 : : struct anon_vma *allocated;
170 : :
171 : : avc = anon_vma_chain_alloc(GFP_KERNEL);
172 [ + + ]: 705380 : if (!avc)
173 : : goto out_enomem;
174 : :
175 : 705379 : anon_vma = find_mergeable_anon_vma(vma);
176 : : allocated = NULL;
177 [ + + ]: 705370 : if (!anon_vma) {
178 : : anon_vma = anon_vma_alloc();
179 [ + - ]: 703316 : if (unlikely(!anon_vma))
180 : : goto out_enomem_free_avc;
181 : : allocated = anon_vma;
182 : : }
183 : :
184 : : anon_vma_lock_write(anon_vma);
185 : : /* page_table_lock to protect against threads */
186 : : spin_lock(&mm->page_table_lock);
187 [ + + ]: 705393 : if (likely(!vma->anon_vma)) {
188 : 705391 : vma->anon_vma = anon_vma;
189 : : anon_vma_chain_link(vma, avc, anon_vma);
190 : : allocated = NULL;
191 : : avc = NULL;
192 : : }
193 : : spin_unlock(&mm->page_table_lock);
194 : : anon_vma_unlock_write(anon_vma);
195 : :
196 [ + + ]: 705400 : if (unlikely(allocated))
197 : : put_anon_vma(allocated);
198 [ + + ]: 705400 : if (unlikely(avc))
199 : : anon_vma_chain_free(avc);
200 : : }
201 : : return 0;
202 : :
203 : : out_enomem_free_avc:
204 : : anon_vma_chain_free(avc);
205 : : out_enomem:
206 : : return -ENOMEM;
207 : : }
208 : :
209 : : /*
210 : : * This is a useful helper function for locking the anon_vma root as
211 : : * we traverse the vma->anon_vma_chain, looping over anon_vma's that
212 : : * have the same vma.
213 : : *
214 : : * Such anon_vma's should have the same root, so you'd expect to see
215 : : * just a single mutex_lock for the whole traversal.
216 : : */
217 : : static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
218 : : {
219 : : struct anon_vma *new_root = anon_vma->root;
220 [ + + ][ + + ]: 41467011 : if (new_root != root) {
221 [ - + ][ # # ]: 27948287 : if (WARN_ON_ONCE(root))
[ - + ][ - + ]
[ - + ][ # # ]
[ - ][ - + ]
222 : 0 : up_write(&root->rwsem);
223 : : root = new_root;
224 : 27948207 : down_write(&root->rwsem);
225 : : }
226 : : return root;
227 : : }
228 : :
229 : : static inline void unlock_anon_vma_root(struct anon_vma *root)
230 : : {
231 [ + + ][ # # ]: 37555705 : if (root)
[ + + ]
232 : 27948647 : up_write(&root->rwsem);
233 : : }
234 : :
235 : : /*
236 : : * Attach the anon_vmas from src to dst.
237 : : * Returns 0 on success, -ENOMEM on failure.
238 : : */
239 : 0 : int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
240 : : {
241 : : struct anon_vma_chain *avc, *pavc;
242 : : struct anon_vma *root = NULL;
243 : :
244 [ + + ]: 28274220 : list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
245 : 14033331 : struct anon_vma *anon_vma;
246 : :
247 : : avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
248 [ - + ]: 14033331 : if (unlikely(!avc)) {
249 : : unlock_anon_vma_root(root);
250 : : root = NULL;
251 : : avc = anon_vma_chain_alloc(GFP_KERNEL);
252 [ # # ]: 0 : if (!avc)
253 : : goto enomem_failure;
254 : : }
255 : 14033331 : anon_vma = pavc->anon_vma;
256 : : root = lock_anon_vma_root(root, anon_vma);
257 : : anon_vma_chain_link(dst, avc, anon_vma);
258 : : }
259 : : unlock_anon_vma_root(root);
260 : : return 0;
261 : :
262 : : enomem_failure:
263 : 0 : unlink_anon_vmas(dst);
264 : 0 : return -ENOMEM;
265 : : }
266 : :
267 : : /*
268 : : * Attach vma to its own anon_vma, as well as to the anon_vmas that
269 : : * the corresponding VMA in the parent process is attached to.
270 : : * Returns 0 on success, non-zero on failure.
271 : : */
272 : 0 : int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
273 : : {
274 : : struct anon_vma_chain *avc;
275 : 25392232 : struct anon_vma *anon_vma;
276 : :
277 : : /* Don't bother if the parent process has no anon_vma here. */
278 [ + + ]: 19102891 : if (!pvma->anon_vma)
279 : : return 0;
280 : :
281 : : /*
282 : : * First, attach the new VMA to the parent VMA's anon_vmas,
283 : : * so rmap can find non-COWed pages in child processes.
284 : : */
285 [ + + ]: 12696114 : if (anon_vma_clone(vma, pvma))
286 : : return -ENOMEM;
287 : :
288 : : /* Then add our own anon_vma. */
289 : : anon_vma = anon_vma_alloc();
290 [ + ]: 12696112 : if (!anon_vma)
291 : : goto out_error;
292 : : avc = anon_vma_chain_alloc(GFP_KERNEL);
293 [ + - ]: 12696118 : if (!avc)
294 : : goto out_error_free_anon_vma;
295 : :
296 : : /*
297 : : * The root anon_vma's spinlock is the lock actually used when we
298 : : * lock any of the anon_vmas in this anon_vma tree.
299 : : */
300 : 12696118 : anon_vma->root = pvma->anon_vma->root;
301 : : /*
302 : : * With refcounts, an anon_vma can stay around longer than the
303 : : * process it belongs to. The root anon_vma needs to be pinned until
304 : : * this anon_vma is freed, because the lock lives in the root.
305 : : */
306 : : get_anon_vma(anon_vma->root);
307 : : /* Mark this anon_vma as the one where our new (COWed) pages go. */
308 : 12696121 : vma->anon_vma = anon_vma;
309 : : anon_vma_lock_write(anon_vma);
310 : : anon_vma_chain_link(vma, avc, anon_vma);
311 : : anon_vma_unlock_write(anon_vma);
312 : :
313 : 12696112 : return 0;
314 : :
315 : : out_error_free_anon_vma:
316 : : put_anon_vma(anon_vma);
317 : : out_error:
318 : 0 : unlink_anon_vmas(vma);
319 : 0 : return -ENOMEM;
320 : : }
321 : :
322 : 0 : void unlink_anon_vmas(struct vm_area_struct *vma)
323 : : {
324 : : struct anon_vma_chain *avc, *next;
325 : : struct anon_vma *root = NULL;
326 : :
327 : : /*
328 : : * Unlink each anon_vma chained to the VMA. This list is ordered
329 : : * from newest to oldest, ensuring the root anon_vma gets freed last.
330 : : */
331 [ + + ]: 50748451 : list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
332 : 27433680 : struct anon_vma *anon_vma = avc->anon_vma;
333 : :
334 : : root = lock_anon_vma_root(root, anon_vma);
335 : 27434078 : anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
336 : :
337 : : /*
338 : : * Leave empty anon_vmas on the list - we'll need
339 : : * to free them outside the lock.
340 : : */
341 [ + + ]: 27434608 : if (RB_EMPTY_ROOT(&anon_vma->rb_root))
342 : 13399420 : continue;
343 : :
344 : : list_del(&avc->same_vma);
345 : : anon_vma_chain_free(avc);
346 : : }
347 : : unlock_anon_vma_root(root);
348 : :
349 : : /*
350 : : * Iterate the list once more, it now only contains empty and unlinked
351 : : * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
352 : : * needing to write-acquire the anon_vma->root->rwsem.
353 : : */
354 [ + + ]: 60028894 : list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
355 : 13399416 : struct anon_vma *anon_vma = avc->anon_vma;
356 : :
357 : : put_anon_vma(anon_vma);
358 : :
359 : : list_del(&avc->same_vma);
360 : : anon_vma_chain_free(avc);
361 : : }
362 : 23314890 : }
363 : :
364 : 0 : static void anon_vma_ctor(void *data)
365 : : {
366 : : struct anon_vma *anon_vma = data;
367 : :
368 : 12995 : init_rwsem(&anon_vma->rwsem);
369 : 12995 : atomic_set(&anon_vma->refcount, 0);
370 : 12995 : anon_vma->rb_root = RB_ROOT;
371 : 12995 : }
372 : :
373 : 0 : void __init anon_vma_init(void)
374 : : {
375 : 0 : anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
376 : : 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
377 : 0 : anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
378 : 0 : }
379 : :
380 : : /*
381 : : * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
382 : : *
383 : : * Since there is no serialization what so ever against page_remove_rmap()
384 : : * the best this function can do is return a locked anon_vma that might
385 : : * have been relevant to this page.
386 : : *
387 : : * The page might have been remapped to a different anon_vma or the anon_vma
388 : : * returned may already be freed (and even reused).
389 : : *
390 : : * In case it was remapped to a different anon_vma, the new anon_vma will be a
391 : : * child of the old anon_vma, and the anon_vma lifetime rules will therefore
392 : : * ensure that any anon_vma obtained from the page will still be valid for as
393 : : * long as we observe page_mapped() [ hence all those page_mapped() tests ].
394 : : *
395 : : * All users of this function must be very careful when walking the anon_vma
396 : : * chain and verify that the page in question is indeed mapped in it
397 : : * [ something equivalent to page_mapped_in_vma() ].
398 : : *
399 : : * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
400 : : * that the anon_vma pointer from page->mapping is valid if there is a
401 : : * mapcount, we can dereference the anon_vma after observing those.
402 : : */
403 : 0 : struct anon_vma *page_get_anon_vma(struct page *page)
404 : : {
405 : : struct anon_vma *anon_vma = NULL;
406 : : unsigned long anon_mapping;
407 : :
408 : : rcu_read_lock();
409 : 0 : anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
410 [ # # ]: 0 : if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
411 : : goto out;
412 [ # # ]: 0 : if (!page_mapped(page))
413 : : goto out;
414 : :
415 : 0 : anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
416 [ # # ]: 0 : if (!atomic_inc_not_zero(&anon_vma->refcount)) {
417 : : anon_vma = NULL;
418 : : goto out;
419 : : }
420 : :
421 : : /*
422 : : * If this page is still mapped, then its anon_vma cannot have been
423 : : * freed. But if it has been unmapped, we have no security against the
424 : : * anon_vma structure being freed and reused (for another anon_vma:
425 : : * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
426 : : * above cannot corrupt).
427 : : */
428 [ # # ]: 0 : if (!page_mapped(page)) {
429 : : put_anon_vma(anon_vma);
430 : : anon_vma = NULL;
431 : : }
432 : : out:
433 : : rcu_read_unlock();
434 : :
435 : 0 : return anon_vma;
436 : : }
437 : :
438 : : /*
439 : : * Similar to page_get_anon_vma() except it locks the anon_vma.
440 : : *
441 : : * Its a little more complex as it tries to keep the fast path to a single
442 : : * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
443 : : * reference like with page_get_anon_vma() and then block on the mutex.
444 : : */
445 : 0 : struct anon_vma *page_lock_anon_vma_read(struct page *page)
446 : : {
447 : 0 : struct anon_vma *anon_vma = NULL;
448 : : struct anon_vma *root_anon_vma;
449 : : unsigned long anon_mapping;
450 : :
451 : : rcu_read_lock();
452 : 0 : anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
453 [ # # ]: 0 : if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
454 : : goto out;
455 [ # # ]: 0 : if (!page_mapped(page))
456 : : goto out;
457 : :
458 : 0 : anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
459 : 0 : root_anon_vma = ACCESS_ONCE(anon_vma->root);
460 [ # # ]: 0 : if (down_read_trylock(&root_anon_vma->rwsem)) {
461 : : /*
462 : : * If the page is still mapped, then this anon_vma is still
463 : : * its anon_vma, and holding the mutex ensures that it will
464 : : * not go away, see anon_vma_free().
465 : : */
466 [ # # ]: 0 : if (!page_mapped(page)) {
467 : 0 : up_read(&root_anon_vma->rwsem);
468 : : anon_vma = NULL;
469 : : }
470 : : goto out;
471 : : }
472 : :
473 : : /* trylock failed, we got to sleep */
474 [ # # ]: 0 : if (!atomic_inc_not_zero(&anon_vma->refcount)) {
475 : : anon_vma = NULL;
476 : : goto out;
477 : : }
478 : :
479 [ # # ]: 0 : if (!page_mapped(page)) {
480 : : put_anon_vma(anon_vma);
481 : : anon_vma = NULL;
482 : : goto out;
483 : : }
484 : :
485 : : /* we pinned the anon_vma, its safe to sleep */
486 : : rcu_read_unlock();
487 : : anon_vma_lock_read(anon_vma);
488 : :
489 [ # # ]: 0 : if (atomic_dec_and_test(&anon_vma->refcount)) {
490 : : /*
491 : : * Oops, we held the last refcount, release the lock
492 : : * and bail -- can't simply use put_anon_vma() because
493 : : * we'll deadlock on the anon_vma_lock_write() recursion.
494 : : */
495 : : anon_vma_unlock_read(anon_vma);
496 : 0 : __put_anon_vma(anon_vma);
497 : : anon_vma = NULL;
498 : : }
499 : :
500 : 0 : return anon_vma;
501 : :
502 : : out:
503 : : rcu_read_unlock();
504 : 0 : return anon_vma;
505 : : }
506 : :
507 : 0 : void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
508 : : {
509 : : anon_vma_unlock_read(anon_vma);
510 : 0 : }
511 : :
512 : : /*
513 : : * At what user virtual address is page expected in @vma?
514 : : */
515 : : static inline unsigned long
516 : : __vma_address(struct page *page, struct vm_area_struct *vma)
517 : : {
518 : : pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
519 : :
520 : : if (unlikely(is_vm_hugetlb_page(vma)))
521 : : pgoff = page->index << huge_page_order(page_hstate(page));
522 : :
523 : 281950 : return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
524 : : }
525 : :
526 : : inline unsigned long
527 : 0 : vma_address(struct page *page, struct vm_area_struct *vma)
528 : : {
529 : : unsigned long address = __vma_address(page, vma);
530 : :
531 : : /* page should be within @vma mapping range */
532 : : VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
533 : :
534 : 0 : return address;
535 : : }
536 : :
537 : : /*
538 : : * At what user virtual address is page expected in vma?
539 : : * Caller should check the page is actually part of the vma.
540 : : */
541 : 0 : unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
542 : : {
543 : : unsigned long address;
544 [ # # ]: 0 : if (PageAnon(page)) {
545 : : struct anon_vma *page__anon_vma = page_anon_vma(page);
546 : : /*
547 : : * Note: swapoff's unuse_vma() is more efficient with this
548 : : * check, and needs it to match anon_vma when KSM is active.
549 : : */
550 [ # # ][ # # ]: 0 : if (!vma->anon_vma || !page__anon_vma ||
[ # # ]
551 : 0 : vma->anon_vma->root != page__anon_vma->root)
552 : : return -EFAULT;
553 [ # # ][ # # ]: 0 : } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
554 [ # # ][ # # ]: 0 : if (!vma->vm_file ||
555 : 0 : vma->vm_file->f_mapping != page->mapping)
556 : : return -EFAULT;
557 : : } else
558 : : return -EFAULT;
559 : : address = __vma_address(page, vma);
560 [ # # ][ # # ]: 0 : if (unlikely(address < vma->vm_start || address >= vma->vm_end))
561 : : return -EFAULT;
562 : 0 : return address;
563 : : }
564 : :
565 : 0 : pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
566 : : {
567 : : pgd_t *pgd;
568 : : pud_t *pud;
569 : : pmd_t *pmd = NULL;
570 : :
571 : 281950 : pgd = pgd_offset(mm, address);
572 : : if (!pgd_present(*pgd))
573 : : goto out;
574 : :
575 : : pud = pud_offset(pgd, address);
576 : : if (!pud_present(*pud))
577 : : goto out;
578 : :
579 : : pmd = pmd_offset(pud, address);
580 [ # # ][ + + ]: 281950 : if (!pmd_present(*pmd))
[ # # ]
581 : : pmd = NULL;
582 : : out:
583 : 0 : return pmd;
584 : : }
585 : :
586 : : /*
587 : : * Check that @page is mapped at @address into @mm.
588 : : *
589 : : * If @sync is false, page_check_address may perform a racy check to avoid
590 : : * the page table lock when the pte is not present (helpful when reclaiming
591 : : * highly shared pages).
592 : : *
593 : : * On success returns with pte mapped and locked.
594 : : */
595 : 0 : pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
596 : : unsigned long address, spinlock_t **ptlp, int sync)
597 : : {
598 : 186308 : pmd_t *pmd;
599 : : pte_t *pte;
600 : : spinlock_t *ptl;
601 : :
602 : : if (unlikely(PageHuge(page))) {
603 : : /* when pud is not present, pte will be NULL */
604 : : pte = huge_pte_offset(mm, address);
605 : : if (!pte)
606 : : return NULL;
607 : :
608 : : ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
609 : : goto check;
610 : : }
611 : :
612 : : pmd = mm_find_pmd(mm, address);
613 [ + + ]: 281950 : if (!pmd)
614 : : return NULL;
615 : :
616 : : if (pmd_trans_huge(*pmd))
617 : : return NULL;
618 : :
619 : 229685 : pte = pte_offset_map(pmd, address);
620 : : /* Make a quick check before getting the lock */
621 [ + + ][ + + ]: 229686 : if (!sync && !pte_present(*pte)) {
622 : 43378 : pte_unmap(pte);
623 : 43378 : return NULL;
624 : : }
625 : :
626 : 186308 : ptl = pte_lockptr(mm, pmd);
627 : : check:
628 : : spin_lock(ptl);
629 [ + + ][ + + ]: 186308 : if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
630 : 173225 : *ptlp = ptl;
631 : 173225 : return pte;
632 : : }
633 : 13082 : pte_unmap_unlock(pte, ptl);
634 : 13082 : return NULL;
635 : : }
636 : :
637 : : /**
638 : : * page_mapped_in_vma - check whether a page is really mapped in a VMA
639 : : * @page: the page to test
640 : : * @vma: the VMA to test
641 : : *
642 : : * Returns 1 if the page is mapped into the page tables of the VMA, 0
643 : : * if the page is not mapped into the page tables of this VMA. Only
644 : : * valid for normal file or anonymous VMAs.
645 : : */
646 : 0 : int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
647 : : {
648 : : unsigned long address;
649 : : pte_t *pte;
650 : : spinlock_t *ptl;
651 : :
652 : : address = __vma_address(page, vma);
653 [ # # ][ # # ]: 0 : if (unlikely(address < vma->vm_start || address >= vma->vm_end))
654 : : return 0;
655 : 0 : pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
656 [ # # ]: 0 : if (!pte) /* the page is not in this mm */
657 : : return 0;
658 : 0 : pte_unmap_unlock(pte, ptl);
659 : :
660 : 0 : return 1;
661 : : }
662 : :
663 : : /*
664 : : * Subfunctions of page_referenced: page_referenced_one called
665 : : * repeatedly from either page_referenced_anon or page_referenced_file.
666 : : */
667 : 0 : int page_referenced_one(struct page *page, struct vm_area_struct *vma,
668 : : unsigned long address, unsigned int *mapcount,
669 : : unsigned long *vm_flags)
670 : : {
671 : 35537 : struct mm_struct *mm = vma->vm_mm;
672 : : spinlock_t *ptl;
673 : : int referenced = 0;
674 : :
675 : : if (unlikely(PageTransHuge(page))) {
676 : : pmd_t *pmd;
677 : :
678 : : /*
679 : : * rmap might return false positives; we must filter
680 : : * these out using page_check_address_pmd().
681 : : */
682 : : pmd = page_check_address_pmd(page, mm, address,
683 : : PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
684 : : if (!pmd)
685 : : goto out;
686 : :
687 : : if (vma->vm_flags & VM_LOCKED) {
688 : : spin_unlock(ptl);
689 : : *mapcount = 0; /* break early from loop */
690 : : *vm_flags |= VM_LOCKED;
691 : : goto out;
692 : : }
693 : :
694 : : /* go ahead even if the pmd is pmd_trans_splitting() */
695 : : if (pmdp_clear_flush_young_notify(vma, address, pmd))
696 : : referenced++;
697 : : spin_unlock(ptl);
698 : : } else {
699 : : pte_t *pte;
700 : :
701 : : /*
702 : : * rmap might return false positives; we must filter
703 : : * these out using page_check_address().
704 : : */
705 : : pte = page_check_address(page, mm, address, &ptl, 0);
706 [ + + ]: 35538 : if (!pte)
707 : : goto out;
708 : :
709 [ - + ]: 17387 : if (vma->vm_flags & VM_LOCKED) {
710 : 0 : pte_unmap_unlock(pte, ptl);
711 : 0 : *mapcount = 0; /* break early from loop */
712 : 0 : *vm_flags |= VM_LOCKED;
713 : 0 : goto out;
714 : : }
715 : :
716 [ + + ]: 17387 : if (ptep_clear_flush_young_notify(vma, address, pte)) {
717 : : /*
718 : : * Don't treat a reference through a sequentially read
719 : : * mapping as such. If the page has been used in
720 : : * another mapping, we will catch it; if this other
721 : : * mapping is already gone, the unmap path will have
722 : : * set PG_referenced or activated the page.
723 : : */
724 [ + - ]: 5347 : if (likely(!(vma->vm_flags & VM_SEQ_READ)))
725 : : referenced++;
726 : : }
727 : 17390 : pte_unmap_unlock(pte, ptl);
728 : : }
729 : :
730 : 17391 : (*mapcount)--;
731 : :
732 [ + + ]: 17391 : if (referenced)
733 : 5348 : *vm_flags |= vma->vm_flags;
734 : : out:
735 : 35542 : return referenced;
736 : : }
737 : :
738 : 0 : static int page_referenced_anon(struct page *page,
739 : : struct mem_cgroup *memcg,
740 : : unsigned long *vm_flags)
741 : : {
742 : : unsigned int mapcount;
743 : : struct anon_vma *anon_vma;
744 : : pgoff_t pgoff;
745 : : struct anon_vma_chain *avc;
746 : : int referenced = 0;
747 : :
748 : 0 : anon_vma = page_lock_anon_vma_read(page);
749 [ # # ]: 0 : if (!anon_vma)
750 : : return referenced;
751 : :
752 : 0 : mapcount = page_mapcount(page);
753 : 0 : pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
754 [ # # ]: 0 : anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
755 : 0 : struct vm_area_struct *vma = avc->vma;
756 : : unsigned long address = vma_address(page, vma);
757 : : /*
758 : : * If we are reclaiming on behalf of a cgroup, skip
759 : : * counting on behalf of references from different
760 : : * cgroups
761 : : */
762 : : if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
763 : : continue;
764 : 0 : referenced += page_referenced_one(page, vma, address,
765 : : &mapcount, vm_flags);
766 [ # # ]: 0 : if (!mapcount)
767 : : break;
768 : : }
769 : :
770 : : page_unlock_anon_vma_read(anon_vma);
771 : : return referenced;
772 : : }
773 : :
774 : : /**
775 : : * page_referenced_file - referenced check for object-based rmap
776 : : * @page: the page we're checking references on.
777 : : * @memcg: target memory control group
778 : : * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
779 : : *
780 : : * For an object-based mapped page, find all the places it is mapped and
781 : : * check/clear the referenced flag. This is done by following the page->mapping
782 : : * pointer, then walking the chain of vmas it holds. It returns the number
783 : : * of references it found.
784 : : *
785 : : * This function is only called from page_referenced for object-based pages.
786 : : */
787 : 0 : static int page_referenced_file(struct page *page,
788 : : struct mem_cgroup *memcg,
789 : : unsigned long *vm_flags)
790 : : {
791 : : unsigned int mapcount;
792 : 4213 : struct address_space *mapping = page->mapping;
793 : 4213 : pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
794 : : struct vm_area_struct *vma;
795 : : int referenced = 0;
796 : :
797 : : /*
798 : : * The caller's checks on page->mapping and !PageAnon have made
799 : : * sure that this is a file page: the check for page->mapping
800 : : * excludes the case just before it gets set on an anon page.
801 : : */
802 [ - + ]: 4213 : BUG_ON(PageAnon(page));
803 : :
804 : : /*
805 : : * The page lock not only makes sure that page->mapping cannot
806 : : * suddenly be NULLified by truncation, it makes sure that the
807 : : * structure at mapping cannot be freed and reused yet,
808 : : * so we can safely take mapping->i_mmap_mutex.
809 : : */
810 [ - + ]: 4213 : BUG_ON(!PageLocked(page));
811 : :
812 : 4213 : mutex_lock(&mapping->i_mmap_mutex);
813 : :
814 : : /*
815 : : * i_mmap_mutex does not stabilize mapcount at all, but mapcount
816 : : * is more likely to be accurate if we note it after spinning.
817 : : */
818 : 4213 : mapcount = page_mapcount(page);
819 : :
820 [ + - ]: 35538 : vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
821 : : unsigned long address = vma_address(page, vma);
822 : : /*
823 : : * If we are reclaiming on behalf of a cgroup, skip
824 : : * counting on behalf of references from different
825 : : * cgroups
826 : : */
827 : : if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
828 : : continue;
829 : 35539 : referenced += page_referenced_one(page, vma, address,
830 : : &mapcount, vm_flags);
831 [ + + ]: 35538 : if (!mapcount)
832 : : break;
833 : : }
834 : :
835 : 4213 : mutex_unlock(&mapping->i_mmap_mutex);
836 : 4213 : return referenced;
837 : : }
838 : :
839 : : /**
840 : : * page_referenced - test if the page was referenced
841 : : * @page: the page to test
842 : : * @is_locked: caller holds lock on the page
843 : : * @memcg: target memory cgroup
844 : : * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
845 : : *
846 : : * Quick test_and_clear_referenced for all mappings to a page,
847 : : * returns the number of ptes which referenced the page.
848 : : */
849 : 0 : int page_referenced(struct page *page,
850 : : int is_locked,
851 : : struct mem_cgroup *memcg,
852 : : unsigned long *vm_flags)
853 : : {
854 : : int referenced = 0;
855 : : int we_locked = 0;
856 : :
857 : 135000 : *vm_flags = 0;
858 [ + + ][ + - ]: 135000 : if (page_mapped(page) && page_rmapping(page)) {
859 [ + + ][ + - ]: 4213 : if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
860 : : we_locked = trylock_page(page);
861 [ + - ]: 2307 : if (!we_locked) {
862 : : referenced++;
863 : : goto out;
864 : : }
865 : : }
866 : : if (unlikely(PageKsm(page)))
867 : : referenced += page_referenced_ksm(page, memcg,
868 : : vm_flags);
869 [ - + ]: 4213 : else if (PageAnon(page))
870 : 0 : referenced += page_referenced_anon(page, memcg,
871 : : vm_flags);
872 [ + - ]: 4213 : else if (page->mapping)
873 : 4213 : referenced += page_referenced_file(page, memcg,
874 : : vm_flags);
875 [ + + ]: 4213 : if (we_locked)
876 : 2307 : unlock_page(page);
877 : : }
878 : : out:
879 : 0 : return referenced;
880 : : }
881 : :
882 : 0 : static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
883 : : unsigned long address)
884 : : {
885 : 187122 : struct mm_struct *mm = vma->vm_mm;
886 : : pte_t *pte;
887 : : spinlock_t *ptl;
888 : : int ret = 0;
889 : :
890 : : pte = page_check_address(page, mm, address, &ptl, 1);
891 [ + + ]: 187122 : if (!pte)
892 : : goto out;
893 : :
894 [ + + ]: 122110 : if (pte_dirty(*pte) || pte_write(*pte)) {
895 : : pte_t entry;
896 : :
897 : 109218 : flush_cache_page(vma, address, pte_pfn(*pte));
898 : 109218 : entry = ptep_clear_flush(vma, address, pte);
899 : : entry = pte_wrprotect(entry);
900 : : entry = pte_mkclean(entry);
901 : : set_pte_at(mm, address, pte, entry);
902 : : ret = 1;
903 : : }
904 : :
905 : 122110 : pte_unmap_unlock(pte, ptl);
906 : :
907 : : if (ret)
908 : : mmu_notifier_invalidate_page(mm, address);
909 : : out:
910 : 187122 : return ret;
911 : : }
912 : :
913 : 0 : static int page_mkclean_file(struct address_space *mapping, struct page *page)
914 : : {
915 : 124764 : pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
916 : : struct vm_area_struct *vma;
917 : : int ret = 0;
918 : :
919 [ - + ]: 124764 : BUG_ON(PageAnon(page));
920 : :
921 : 124764 : mutex_lock(&mapping->i_mmap_mutex);
922 [ + + ]: 349246 : vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
923 [ + + ]: 224481 : if (vma->vm_flags & VM_SHARED) {
924 : : unsigned long address = vma_address(page, vma);
925 : 187122 : ret += page_mkclean_one(page, vma, address);
926 : : }
927 : : }
928 : 124765 : mutex_unlock(&mapping->i_mmap_mutex);
929 : 124765 : return ret;
930 : : }
931 : :
932 : 0 : int page_mkclean(struct page *page)
933 : : {
934 : : int ret = 0;
935 : :
936 [ - + ]: 988625 : BUG_ON(!PageLocked(page));
937 : :
938 [ + + ]: 988625 : if (page_mapped(page)) {
939 : 124762 : struct address_space *mapping = page_mapping(page);
940 [ + - ]: 124764 : if (mapping)
941 : 124764 : ret = page_mkclean_file(mapping, page);
942 : : }
943 : :
944 : 3 : return ret;
945 : : }
946 : : EXPORT_SYMBOL_GPL(page_mkclean);
947 : :
948 : : /**
949 : : * page_move_anon_rmap - move a page to our anon_vma
950 : : * @page: the page to move to our anon_vma
951 : : * @vma: the vma the page belongs to
952 : : * @address: the user virtual address mapped
953 : : *
954 : : * When a page belongs exclusively to one process after a COW event,
955 : : * that page can be moved into the anon_vma that belongs to just that
956 : : * process, so the rmap code will not search the parent or sibling
957 : : * processes.
958 : : */
959 : 0 : void page_move_anon_rmap(struct page *page,
960 : : struct vm_area_struct *vma, unsigned long address)
961 : : {
962 : 7687618 : struct anon_vma *anon_vma = vma->anon_vma;
963 : :
964 : : VM_BUG_ON(!PageLocked(page));
965 : : VM_BUG_ON(!anon_vma);
966 : : VM_BUG_ON(page->index != linear_page_index(vma, address));
967 : :
968 : 7687618 : anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
969 : 7687618 : page->mapping = (struct address_space *) anon_vma;
970 : 7687618 : }
971 : :
972 : : /**
973 : : * __page_set_anon_rmap - set up new anonymous rmap
974 : : * @page: Page to add to rmap
975 : : * @vma: VM area to add page to.
976 : : * @address: User virtual address of the mapping
977 : : * @exclusive: the page is exclusively owned by the current process
978 : : */
979 : 0 : static void __page_set_anon_rmap(struct page *page,
980 : 26811044 : struct vm_area_struct *vma, unsigned long address, int exclusive)
981 : : {
982 : 26811635 : struct anon_vma *anon_vma = vma->anon_vma;
983 : :
984 [ - + ]: 26811635 : BUG_ON(!anon_vma);
985 : :
986 [ + ]: 26811635 : if (PageAnon(page))
987 : 0 : return;
988 : :
989 : : /*
990 : : * If the page isn't exclusively mapped into this vma,
991 : : * we must use the _oldest_ possible anon_vma for the
992 : : * page mapping!
993 : : */
994 [ - + ]: 53622679 : if (!exclusive)
995 : 0 : anon_vma = anon_vma->root;
996 : :
997 : 26811044 : anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
998 : 26811044 : page->mapping = (struct address_space *) anon_vma;
999 : 26811044 : page->index = linear_page_index(vma, address);
1000 : : }
1001 : :
1002 : : /**
1003 : : * __page_check_anon_rmap - sanity check anonymous rmap addition
1004 : : * @page: the page to add the mapping to
1005 : : * @vma: the vm area in which the mapping is added
1006 : : * @address: the user virtual address mapped
1007 : : */
1008 : : static void __page_check_anon_rmap(struct page *page,
1009 : : struct vm_area_struct *vma, unsigned long address)
1010 : : {
1011 : : #ifdef CONFIG_DEBUG_VM
1012 : : /*
1013 : : * The page's anon-rmap details (mapping and index) are guaranteed to
1014 : : * be set up correctly at this point.
1015 : : *
1016 : : * We have exclusion against page_add_anon_rmap because the caller
1017 : : * always holds the page locked, except if called from page_dup_rmap,
1018 : : * in which case the page is already known to be setup.
1019 : : *
1020 : : * We have exclusion against page_add_new_anon_rmap because those pages
1021 : : * are initially only visible via the pagetables, and the pte is locked
1022 : : * over the call to page_add_new_anon_rmap.
1023 : : */
1024 : : BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
1025 : : BUG_ON(page->index != linear_page_index(vma, address));
1026 : : #endif
1027 : : }
1028 : :
1029 : : /**
1030 : : * page_add_anon_rmap - add pte mapping to an anonymous page
1031 : : * @page: the page to add the mapping to
1032 : : * @vma: the vm area in which the mapping is added
1033 : : * @address: the user virtual address mapped
1034 : : *
1035 : : * The caller needs to hold the pte lock, and the page must be locked in
1036 : : * the anon_vma case: to serialize mapping,index checking after setting,
1037 : : * and to ensure that PageAnon is not being upgraded racily to PageKsm
1038 : : * (but PageKsm is never downgraded to PageAnon).
1039 : : */
1040 : 0 : void page_add_anon_rmap(struct page *page,
1041 : : struct vm_area_struct *vma, unsigned long address)
1042 : : {
1043 : 0 : do_page_add_anon_rmap(page, vma, address, 0);
1044 : 0 : }
1045 : :
1046 : : /*
1047 : : * Special version of the above for do_swap_page, which often runs
1048 : : * into pages that are exclusively owned by the current process.
1049 : : * Everybody else should continue to use page_add_anon_rmap above.
1050 : : */
1051 : 0 : void do_page_add_anon_rmap(struct page *page,
1052 : : struct vm_area_struct *vma, unsigned long address, int exclusive)
1053 : : {
1054 : 0 : int first = atomic_inc_and_test(&page->_mapcount);
1055 [ # # ]: 0 : if (first) {
1056 : : if (PageTransHuge(page))
1057 : : __inc_zone_page_state(page,
1058 : : NR_ANON_TRANSPARENT_HUGEPAGES);
1059 : 0 : __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
1060 : : hpage_nr_pages(page));
1061 : : }
1062 : : if (unlikely(PageKsm(page)))
1063 : 0 : return;
1064 : :
1065 : : VM_BUG_ON(!PageLocked(page));
1066 : : /* address might be in next vma when migration races vma_adjust */
1067 [ # # ]: 0 : if (first)
1068 : 0 : __page_set_anon_rmap(page, vma, address, exclusive);
1069 : : else
1070 : : __page_check_anon_rmap(page, vma, address);
1071 : : }
1072 : :
1073 : : /**
1074 : : * page_add_new_anon_rmap - add pte mapping to a new anonymous page
1075 : : * @page: the page to add the mapping to
1076 : : * @vma: the vm area in which the mapping is added
1077 : : * @address: the user virtual address mapped
1078 : : *
1079 : : * Same as page_add_anon_rmap but must only be called on *new* pages.
1080 : : * This means the inc-and-test can be bypassed.
1081 : : * Page does not have to be locked.
1082 : : */
1083 : 0 : void page_add_new_anon_rmap(struct page *page,
1084 : 26811056 : struct vm_area_struct *vma, unsigned long address)
1085 : : {
1086 : : VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1087 : : SetPageSwapBacked(page);
1088 : 26812196 : atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
1089 : : if (PageTransHuge(page))
1090 : : __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
1091 : 26812196 : __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
1092 : : hpage_nr_pages(page));
1093 : 26811903 : __page_set_anon_rmap(page, vma, address, 1);
1094 [ + + ]: 26811451 : if (!mlocked_vma_newpage(vma, page)) {
1095 : : SetPageActive(page);
1096 : 26804936 : lru_cache_add(page);
1097 : : } else
1098 : 6278 : add_page_to_unevictable_list(page);
1099 : 26810244 : }
1100 : :
1101 : : /**
1102 : : * page_add_file_rmap - add pte mapping to a file page
1103 : : * @page: the page to add the mapping to
1104 : : *
1105 : : * The caller needs to hold the pte lock.
1106 : : */
1107 : 0 : void page_add_file_rmap(struct page *page)
1108 : : {
1109 : : bool locked;
1110 : : unsigned long flags;
1111 : :
1112 : : mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1113 [ + + ]: 40793145 : if (atomic_inc_and_test(&page->_mapcount)) {
1114 : 5404999 : __inc_zone_page_state(page, NR_FILE_MAPPED);
1115 : : mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
1116 : : }
1117 : : mem_cgroup_end_update_page_stat(page, &locked, &flags);
1118 : 0 : }
1119 : :
1120 : : /**
1121 : : * page_remove_rmap - take down pte mapping from a page
1122 : : * @page: page to remove mapping from
1123 : : *
1124 : : * The caller needs to hold the pte lock.
1125 : : */
1126 : 0 : void page_remove_rmap(struct page *page)
1127 : : {
1128 : : bool anon = PageAnon(page);
1129 : : bool locked;
1130 : : unsigned long flags;
1131 : :
1132 : : /*
1133 : : * The anon case has no mem_cgroup page_stat to update; but may
1134 : : * uncharge_page() below, where the lock ordering can deadlock if
1135 : : * we hold the lock against page_stat move: so avoid it on anon.
1136 : : */
1137 : : if (!anon)
1138 : : mem_cgroup_begin_update_page_stat(page, &locked, &flags);
1139 : :
1140 : : /* page still mapped by someone else? */
1141 [ + + ]: 94268323 : if (!atomic_add_negative(-1, &page->_mapcount))
1142 : : goto out;
1143 : :
1144 : : /*
1145 : : * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
1146 : : * and not charged by memcg for now.
1147 : : */
1148 : : if (unlikely(PageHuge(page)))
1149 : : goto out;
1150 [ + + ]: 32216128 : if (anon) {
1151 : : mem_cgroup_uncharge_page(page);
1152 : : if (PageTransHuge(page))
1153 : : __dec_zone_page_state(page,
1154 : : NR_ANON_TRANSPARENT_HUGEPAGES);
1155 : 26810721 : __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
1156 : : -hpage_nr_pages(page));
1157 : : } else {
1158 : 5405407 : __dec_zone_page_state(page, NR_FILE_MAPPED);
1159 : : mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
1160 : : mem_cgroup_end_update_page_stat(page, &locked, &flags);
1161 : : }
1162 [ - + ]: 32207633 : if (unlikely(PageMlocked(page)))
1163 : 0 : clear_page_mlock(page);
1164 : : /*
1165 : : * It would be tidy to reset the PageAnon mapping here,
1166 : : * but that might overwrite a racing page_add_anon_rmap
1167 : : * which increments mapcount after us but sets mapping
1168 : : * before us: so leave the reset to free_hot_cold_page,
1169 : : * and remember that it's only reliable while mapped.
1170 : : * Leaving it set also helps swapoff to reinstate ptes
1171 : : * faster for those pages still in swapcache.
1172 : : */
1173 : 0 : return;
1174 : : out:
1175 : : if (!anon)
1176 : : mem_cgroup_end_update_page_stat(page, &locked, &flags);
1177 : : }
1178 : :
1179 : : /*
1180 : : * Subfunctions of try_to_unmap: try_to_unmap_one called
1181 : : * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file.
1182 : : */
1183 : 0 : int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1184 : : unsigned long address, enum ttu_flags flags)
1185 : : {
1186 : 59289 : struct mm_struct *mm = vma->vm_mm;
1187 : : pte_t *pte;
1188 : : pte_t pteval;
1189 : : spinlock_t *ptl;
1190 : : int ret = SWAP_AGAIN;
1191 : :
1192 : : pte = page_check_address(page, mm, address, &ptl, 0);
1193 [ + + ]: 59289 : if (!pte)
1194 : : goto out;
1195 : :
1196 : : /*
1197 : : * If the page is mlock()d, we cannot swap it out.
1198 : : * If it's recently referenced (perhaps page_referenced
1199 : : * skipped over this mm) then we should reactivate it.
1200 : : */
1201 [ + - ]: 33725 : if (!(flags & TTU_IGNORE_MLOCK)) {
1202 [ + - ]: 33725 : if (vma->vm_flags & VM_LOCKED)
1203 : : goto out_mlock;
1204 : :
1205 [ + + ]: 33725 : if (TTU_ACTION(flags) == TTU_MUNLOCK)
1206 : : goto out_unmap;
1207 : : }
1208 [ + - ]: 5136 : if (!(flags & TTU_IGNORE_ACCESS)) {
1209 [ + - ]: 5136 : if (ptep_clear_flush_young_notify(vma, address, pte)) {
1210 : : ret = SWAP_FAIL;
1211 : : goto out_unmap;
1212 : : }
1213 : : }
1214 : :
1215 : : /* Nuke the page table entry. */
1216 : 5136 : flush_cache_page(vma, address, page_to_pfn(page));
1217 : 5136 : pteval = ptep_clear_flush(vma, address, pte);
1218 : :
1219 : : /* Move the dirty bit to the physical page now the pte is gone. */
1220 [ - + ]: 5136 : if (pte_dirty(pteval))
1221 : 0 : set_page_dirty(page);
1222 : :
1223 : : /* Update high watermark before we lower rss */
1224 : : update_hiwater_rss(mm);
1225 : :
1226 : : if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1227 : : if (!PageHuge(page)) {
1228 : : if (PageAnon(page))
1229 : : dec_mm_counter(mm, MM_ANONPAGES);
1230 : : else
1231 : : dec_mm_counter(mm, MM_FILEPAGES);
1232 : : }
1233 : : set_pte_at(mm, address, pte,
1234 : : swp_entry_to_pte(make_hwpoison_entry(page)));
1235 [ - + ]: 64425 : } else if (PageAnon(page)) {
1236 : 0 : swp_entry_t entry = { .val = page_private(page) };
1237 : : pte_t swp_pte;
1238 : :
1239 [ # # ]: 0 : if (PageSwapCache(page)) {
1240 : : /*
1241 : : * Store the swap location in the pte.
1242 : : * See handle_pte_fault() ...
1243 : : */
1244 [ # # ]: 0 : if (swap_duplicate(entry) < 0) {
1245 : : set_pte_at(mm, address, pte, pteval);
1246 : : ret = SWAP_FAIL;
1247 : : goto out_unmap;
1248 : : }
1249 [ # # ]: 0 : if (list_empty(&mm->mmlist)) {
1250 : : spin_lock(&mmlist_lock);
1251 [ # # ]: 0 : if (list_empty(&mm->mmlist))
1252 : : list_add(&mm->mmlist, &init_mm.mmlist);
1253 : : spin_unlock(&mmlist_lock);
1254 : : }
1255 : : dec_mm_counter(mm, MM_ANONPAGES);
1256 : : inc_mm_counter(mm, MM_SWAPENTS);
1257 : : } else if (IS_ENABLED(CONFIG_MIGRATION)) {
1258 : : /*
1259 : : * Store the pfn of the page in a special migration
1260 : : * pte. do_swap_page() will wait until the migration
1261 : : * pte is removed and then restart fault handling.
1262 : : */
1263 [ # # ]: 0 : BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
1264 : 0 : entry = make_migration_entry(page, pte_write(pteval));
1265 : : }
1266 : : swp_pte = swp_entry_to_pte(entry);
1267 : : if (pte_soft_dirty(pteval))
1268 : : swp_pte = pte_swp_mksoft_dirty(swp_pte);
1269 : : set_pte_at(mm, address, pte, swp_pte);
1270 [ # # ]: 0 : BUG_ON(pte_file(*pte));
1271 [ - + ]: 5136 : } else if (IS_ENABLED(CONFIG_MIGRATION) &&
1272 : 5136 : (TTU_ACTION(flags) == TTU_MIGRATION)) {
1273 : : /* Establish migration entry for a file page */
1274 : : swp_entry_t entry;
1275 : 0 : entry = make_migration_entry(page, pte_write(pteval));
1276 : : set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1277 : : } else
1278 : : dec_mm_counter(mm, MM_FILEPAGES);
1279 : :
1280 : 5136 : page_remove_rmap(page);
1281 : 5136 : page_cache_release(page);
1282 : :
1283 : : out_unmap:
1284 : 33725 : pte_unmap_unlock(pte, ptl);
1285 : : if (ret != SWAP_FAIL)
1286 : : mmu_notifier_invalidate_page(mm, address);
1287 : : out:
1288 : 59289 : return ret;
1289 : :
1290 : : out_mlock:
1291 : 0 : pte_unmap_unlock(pte, ptl);
1292 : :
1293 : :
1294 : : /*
1295 : : * We need mmap_sem locking, Otherwise VM_LOCKED check makes
1296 : : * unstable result and race. Plus, We can't wait here because
1297 : : * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
1298 : : * if trylock failed, the page remain in evictable lru and later
1299 : : * vmscan could retry to move the page to unevictable lru if the
1300 : : * page is actually mlocked.
1301 : : */
1302 [ # # ]: 0 : if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1303 [ # # ]: 0 : if (vma->vm_flags & VM_LOCKED) {
1304 : 0 : mlock_vma_page(page);
1305 : : ret = SWAP_MLOCK;
1306 : : }
1307 : 0 : up_read(&vma->vm_mm->mmap_sem);
1308 : : }
1309 : 0 : return ret;
1310 : : }
1311 : :
1312 : : /*
1313 : : * objrmap doesn't work for nonlinear VMAs because the assumption that
1314 : : * offset-into-file correlates with offset-into-virtual-addresses does not hold.
1315 : : * Consequently, given a particular page and its ->index, we cannot locate the
1316 : : * ptes which are mapping that page without an exhaustive linear search.
1317 : : *
1318 : : * So what this code does is a mini "virtual scan" of each nonlinear VMA which
1319 : : * maps the file to which the target page belongs. The ->vm_private_data field
1320 : : * holds the current cursor into that scan. Successive searches will circulate
1321 : : * around the vma's virtual address space.
1322 : : *
1323 : : * So as more replacement pressure is applied to the pages in a nonlinear VMA,
1324 : : * more scanning pressure is placed against them as well. Eventually pages
1325 : : * will become fully unmapped and are eligible for eviction.
1326 : : *
1327 : : * For very sparsely populated VMAs this is a little inefficient - chances are
1328 : : * there there won't be many ptes located within the scan cluster. In this case
1329 : : * maybe we could scan further - to the end of the pte page, perhaps.
1330 : : *
1331 : : * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can
1332 : : * acquire it without blocking. If vma locked, mlock the pages in the cluster,
1333 : : * rather than unmapping them. If we encounter the "check_page" that vmscan is
1334 : : * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
1335 : : */
1336 : : #define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
1337 : : #define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
1338 : :
1339 : 0 : static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1340 : 0 : struct vm_area_struct *vma, struct page *check_page)
1341 : : {
1342 : 0 : struct mm_struct *mm = vma->vm_mm;
1343 : 0 : pmd_t *pmd;
1344 : : pte_t *pte;
1345 : : pte_t pteval;
1346 : : spinlock_t *ptl;
1347 : 0 : struct page *page;
1348 : : unsigned long address;
1349 : : unsigned long mmun_start; /* For mmu_notifiers */
1350 : : unsigned long mmun_end; /* For mmu_notifiers */
1351 : : unsigned long end;
1352 : : int ret = SWAP_AGAIN;
1353 : : int locked_vma = 0;
1354 : :
1355 : 0 : address = (vma->vm_start + cursor) & CLUSTER_MASK;
1356 : 0 : end = address + CLUSTER_SIZE;
1357 [ # # ]: 0 : if (address < vma->vm_start)
1358 : : address = vma->vm_start;
1359 [ # # ]: 0 : if (end > vma->vm_end)
1360 : : end = vma->vm_end;
1361 : :
1362 : : pmd = mm_find_pmd(mm, address);
1363 [ # # ]: 0 : if (!pmd)
1364 : : return ret;
1365 : :
1366 : : mmun_start = address;
1367 : : mmun_end = end;
1368 : : mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1369 : :
1370 : : /*
1371 : : * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
1372 : : * keep the sem while scanning the cluster for mlocking pages.
1373 : : */
1374 [ # # ]: 0 : if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1375 : 0 : locked_vma = (vma->vm_flags & VM_LOCKED);
1376 [ # # ]: 0 : if (!locked_vma)
1377 : 0 : up_read(&vma->vm_mm->mmap_sem); /* don't need it */
1378 : : }
1379 : :
1380 : 0 : pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1381 : :
1382 : : /* Update high watermark before we lower rss */
1383 : : update_hiwater_rss(mm);
1384 : :
1385 [ # # ]: 0 : for (; address < end; pte++, address += PAGE_SIZE) {
1386 [ # # ]: 0 : if (!pte_present(*pte))
1387 : 0 : continue;
1388 : 0 : page = vm_normal_page(vma, address, *pte);
1389 [ # # ][ # # ]: 0 : BUG_ON(!page || PageAnon(page));
1390 : :
1391 [ # # ]: 0 : if (locked_vma) {
1392 : 0 : mlock_vma_page(page); /* no-op if already mlocked */
1393 [ # # ]: 0 : if (page == check_page)
1394 : : ret = SWAP_MLOCK;
1395 : 0 : continue; /* don't unmap */
1396 : : }
1397 : :
1398 [ # # ]: 0 : if (ptep_clear_flush_young_notify(vma, address, pte))
1399 : 0 : continue;
1400 : :
1401 : : /* Nuke the page table entry. */
1402 : 0 : flush_cache_page(vma, address, pte_pfn(*pte));
1403 : 0 : pteval = ptep_clear_flush(vma, address, pte);
1404 : :
1405 : : /* If nonlinear, store the file page offset in the pte. */
1406 [ # # ]: 0 : if (page->index != linear_page_index(vma, address)) {
1407 : 0 : pte_t ptfile = pgoff_to_pte(page->index);
1408 : : if (pte_soft_dirty(pteval))
1409 : : pte_file_mksoft_dirty(ptfile);
1410 : : set_pte_at(mm, address, pte, ptfile);
1411 : : }
1412 : :
1413 : : /* Move the dirty bit to the physical page now the pte is gone. */
1414 [ # # ]: 0 : if (pte_dirty(pteval))
1415 : 0 : set_page_dirty(page);
1416 : :
1417 : 0 : page_remove_rmap(page);
1418 : 0 : page_cache_release(page);
1419 : : dec_mm_counter(mm, MM_FILEPAGES);
1420 : 0 : (*mapcount)--;
1421 : : }
1422 : 0 : pte_unmap_unlock(pte - 1, ptl);
1423 : : mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1424 [ # # ]: 0 : if (locked_vma)
1425 : 0 : up_read(&vma->vm_mm->mmap_sem);
1426 : 0 : return ret;
1427 : : }
1428 : :
1429 : 0 : bool is_vma_temporary_stack(struct vm_area_struct *vma)
1430 : : {
1431 : 0 : int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1432 : :
1433 [ # # ][ # # ]: 0 : if (!maybe_stack)
1434 : : return false;
1435 : :
1436 [ # # ][ # # ]: 0 : if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1437 : : VM_STACK_INCOMPLETE_SETUP)
1438 : : return true;
1439 : :
1440 : 0 : return false;
1441 : : }
1442 : :
1443 : : /**
1444 : : * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
1445 : : * rmap method
1446 : : * @page: the page to unmap/unlock
1447 : : * @flags: action and flags
1448 : : *
1449 : : * Find all the mappings of a page using the mapping pointer and the vma chains
1450 : : * contained in the anon_vma struct it points to.
1451 : : *
1452 : : * This function is only called from try_to_unmap/try_to_munlock for
1453 : : * anonymous pages.
1454 : : * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1455 : : * where the page was found will be held for write. So, we won't recheck
1456 : : * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1457 : : * 'LOCKED.
1458 : : */
1459 : 0 : static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1460 : : {
1461 : : struct anon_vma *anon_vma;
1462 : : pgoff_t pgoff;
1463 : : struct anon_vma_chain *avc;
1464 : : int ret = SWAP_AGAIN;
1465 : :
1466 : 0 : anon_vma = page_lock_anon_vma_read(page);
1467 [ # # ]: 0 : if (!anon_vma)
1468 : : return ret;
1469 : :
1470 : 0 : pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1471 [ # # ]: 0 : anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1472 : 0 : struct vm_area_struct *vma = avc->vma;
1473 : : unsigned long address;
1474 : :
1475 : : /*
1476 : : * During exec, a temporary VMA is setup and later moved.
1477 : : * The VMA is moved under the anon_vma lock but not the
1478 : : * page tables leading to a race where migration cannot
1479 : : * find the migration ptes. Rather than increasing the
1480 : : * locking requirements of exec(), migration skips
1481 : : * temporary VMAs until after exec() completes.
1482 : : */
1483 [ # # ][ # # ]: 0 : if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
1484 : : is_vma_temporary_stack(vma))
1485 : 0 : continue;
1486 : :
1487 : : address = vma_address(page, vma);
1488 : 0 : ret = try_to_unmap_one(page, vma, address, flags);
1489 [ # # ][ # # ]: 0 : if (ret != SWAP_AGAIN || !page_mapped(page))
1490 : : break;
1491 : : }
1492 : :
1493 : : page_unlock_anon_vma_read(anon_vma);
1494 : 0 : return ret;
1495 : : }
1496 : :
1497 : : /**
1498 : : * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
1499 : : * @page: the page to unmap/unlock
1500 : : * @flags: action and flags
1501 : : *
1502 : : * Find all the mappings of a page using the mapping pointer and the vma chains
1503 : : * contained in the address_space struct it points to.
1504 : : *
1505 : : * This function is only called from try_to_unmap/try_to_munlock for
1506 : : * object-based pages.
1507 : : * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1508 : : * where the page was found will be held for write. So, we won't recheck
1509 : : * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1510 : : * 'LOCKED.
1511 : : */
1512 : 0 : static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1513 : : {
1514 : 3212 : struct address_space *mapping = page->mapping;
1515 : 3212 : pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1516 : : struct vm_area_struct *vma;
1517 : : int ret = SWAP_AGAIN;
1518 : : unsigned long cursor;
1519 : : unsigned long max_nl_cursor = 0;
1520 : : unsigned long max_nl_size = 0;
1521 : : unsigned int mapcount;
1522 : :
1523 : : if (PageHuge(page))
1524 : : pgoff = page->index << compound_order(page);
1525 : :
1526 : 3212 : mutex_lock(&mapping->i_mmap_mutex);
1527 [ + + ]: 61157 : vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1528 : : unsigned long address = vma_address(page, vma);
1529 : 59289 : ret = try_to_unmap_one(page, vma, address, flags);
1530 [ + - ][ + + ]: 59289 : if (ret != SWAP_AGAIN || !page_mapped(page))
1531 : : goto out;
1532 : : }
1533 : :
1534 [ - + ]: 1868 : if (list_empty(&mapping->i_mmap_nonlinear))
1535 : : goto out;
1536 : :
1537 : : /*
1538 : : * We don't bother to try to find the munlocked page in nonlinears.
1539 : : * It's costly. Instead, later, page reclaim logic may call
1540 : : * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
1541 : : */
1542 [ # # ]: 0 : if (TTU_ACTION(flags) == TTU_MUNLOCK)
1543 : : goto out;
1544 : :
1545 [ # # ]: 0 : list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1546 : : shared.nonlinear) {
1547 : 0 : cursor = (unsigned long) vma->vm_private_data;
1548 [ # # ]: 0 : if (cursor > max_nl_cursor)
1549 : : max_nl_cursor = cursor;
1550 : 0 : cursor = vma->vm_end - vma->vm_start;
1551 [ # # ]: 0 : if (cursor > max_nl_size)
1552 : : max_nl_size = cursor;
1553 : : }
1554 : :
1555 [ # # ]: 0 : if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
1556 : : ret = SWAP_FAIL;
1557 : : goto out;
1558 : : }
1559 : :
1560 : : /*
1561 : : * We don't try to search for this page in the nonlinear vmas,
1562 : : * and page_referenced wouldn't have found it anyway. Instead
1563 : : * just walk the nonlinear vmas trying to age and unmap some.
1564 : : * The mapcount of the page we came in with is irrelevant,
1565 : : * but even so use it as a guide to how hard we should try?
1566 : : */
1567 : 0 : mapcount = page_mapcount(page);
1568 [ # # ]: 0 : if (!mapcount)
1569 : : goto out;
1570 : 0 : cond_resched();
1571 : :
1572 : 0 : max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1573 [ # # ]: 0 : if (max_nl_cursor == 0)
1574 : : max_nl_cursor = CLUSTER_SIZE;
1575 : :
1576 : : do {
1577 [ # # ]: 0 : list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1578 : : shared.nonlinear) {
1579 : 0 : cursor = (unsigned long) vma->vm_private_data;
1580 [ # # ][ # # ]: 0 : while ( cursor < max_nl_cursor &&
1581 : 0 : cursor < vma->vm_end - vma->vm_start) {
1582 [ # # ]: 0 : if (try_to_unmap_cluster(cursor, &mapcount,
1583 : : vma, page) == SWAP_MLOCK)
1584 : : ret = SWAP_MLOCK;
1585 : 0 : cursor += CLUSTER_SIZE;
1586 : 0 : vma->vm_private_data = (void *) cursor;
1587 [ # # ]: 0 : if ((int)mapcount <= 0)
1588 : : goto out;
1589 : : }
1590 : 0 : vma->vm_private_data = (void *) max_nl_cursor;
1591 : : }
1592 : 0 : cond_resched();
1593 : 0 : max_nl_cursor += CLUSTER_SIZE;
1594 [ # # ]: 0 : } while (max_nl_cursor <= max_nl_size);
1595 : :
1596 : : /*
1597 : : * Don't loop forever (perhaps all the remaining pages are
1598 : : * in locked vmas). Reset cursor on all unreserved nonlinear
1599 : : * vmas, now forgetting on which ones it had fallen behind.
1600 : : */
1601 [ # # ]: 0 : list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
1602 : 0 : vma->vm_private_data = NULL;
1603 : : out:
1604 : 3212 : mutex_unlock(&mapping->i_mmap_mutex);
1605 : 3212 : return ret;
1606 : : }
1607 : :
1608 : : /**
1609 : : * try_to_unmap - try to remove all page table mappings to a page
1610 : : * @page: the page to get unmapped
1611 : : * @flags: action and flags
1612 : : *
1613 : : * Tries to remove all the page table entries which are mapping this
1614 : : * page, used in the pageout path. Caller must hold the page lock.
1615 : : * Return values are:
1616 : : *
1617 : : * SWAP_SUCCESS - we succeeded in removing all mappings
1618 : : * SWAP_AGAIN - we missed a mapping, try again later
1619 : : * SWAP_FAIL - the page is unswappable
1620 : : * SWAP_MLOCK - page is mlocked.
1621 : : */
1622 : 0 : int try_to_unmap(struct page *page, enum ttu_flags flags)
1623 : : {
1624 : : int ret;
1625 : :
1626 [ - + ]: 1344 : BUG_ON(!PageLocked(page));
1627 : : VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
1628 : :
1629 : : if (unlikely(PageKsm(page)))
1630 : : ret = try_to_unmap_ksm(page, flags);
1631 [ - + ]: 1344 : else if (PageAnon(page))
1632 : 0 : ret = try_to_unmap_anon(page, flags);
1633 : : else
1634 : 1344 : ret = try_to_unmap_file(page, flags);
1635 [ + - ][ + - ]: 1344 : if (ret != SWAP_MLOCK && !page_mapped(page))
1636 : : ret = SWAP_SUCCESS;
1637 : 0 : return ret;
1638 : : }
1639 : :
1640 : : /**
1641 : : * try_to_munlock - try to munlock a page
1642 : : * @page: the page to be munlocked
1643 : : *
1644 : : * Called from munlock code. Checks all of the VMAs mapping the page
1645 : : * to make sure nobody else has this page mlocked. The page will be
1646 : : * returned with PG_mlocked cleared if no other vmas have it mlocked.
1647 : : *
1648 : : * Return values are:
1649 : : *
1650 : : * SWAP_AGAIN - no vma is holding page mlocked, or,
1651 : : * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
1652 : : * SWAP_FAIL - page cannot be located at present
1653 : : * SWAP_MLOCK - page is now mlocked.
1654 : : */
1655 : 0 : int try_to_munlock(struct page *page)
1656 : : {
1657 : : VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1658 : :
1659 : : if (unlikely(PageKsm(page)))
1660 : : return try_to_unmap_ksm(page, TTU_MUNLOCK);
1661 [ - + ]: 1868 : else if (PageAnon(page))
1662 : 0 : return try_to_unmap_anon(page, TTU_MUNLOCK);
1663 : : else
1664 : 1868 : return try_to_unmap_file(page, TTU_MUNLOCK);
1665 : : }
1666 : :
1667 : 0 : void __put_anon_vma(struct anon_vma *anon_vma)
1668 : : {
1669 : 13399440 : struct anon_vma *root = anon_vma->root;
1670 : :
1671 [ + + - + ]: 26095557 : if (root != anon_vma && atomic_dec_and_test(&root->refcount))
1672 : : anon_vma_free(root);
1673 : :
1674 : : anon_vma_free(anon_vma);
1675 : 13399445 : }
1676 : :
1677 : : #ifdef CONFIG_MIGRATION
1678 : : /*
1679 : : * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
1680 : : * Called by migrate.c to remove migration ptes, but might be used more later.
1681 : : */
1682 : 0 : static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1683 : : struct vm_area_struct *, unsigned long, void *), void *arg)
1684 : : {
1685 : 0 : struct anon_vma *anon_vma;
1686 : 0 : pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1687 : : struct anon_vma_chain *avc;
1688 : : int ret = SWAP_AGAIN;
1689 : :
1690 : : /*
1691 : : * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
1692 : : * because that depends on page_mapped(); but not all its usages
1693 : : * are holding mmap_sem. Users without mmap_sem are required to
1694 : : * take a reference count to prevent the anon_vma disappearing
1695 : : */
1696 : : anon_vma = page_anon_vma(page);
1697 [ # # ]: 0 : if (!anon_vma)
1698 : : return ret;
1699 : : anon_vma_lock_read(anon_vma);
1700 [ # # ]: 0 : anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1701 : 0 : struct vm_area_struct *vma = avc->vma;
1702 : : unsigned long address = vma_address(page, vma);
1703 : 0 : ret = rmap_one(page, vma, address, arg);
1704 [ # # ]: 0 : if (ret != SWAP_AGAIN)
1705 : : break;
1706 : : }
1707 : : anon_vma_unlock_read(anon_vma);
1708 : 0 : return ret;
1709 : : }
1710 : :
1711 : 0 : static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1712 : : struct vm_area_struct *, unsigned long, void *), void *arg)
1713 : : {
1714 : 0 : struct address_space *mapping = page->mapping;
1715 : 0 : pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1716 : : struct vm_area_struct *vma;
1717 : : int ret = SWAP_AGAIN;
1718 : :
1719 [ # # ]: 0 : if (!mapping)
1720 : : return ret;
1721 : 0 : mutex_lock(&mapping->i_mmap_mutex);
1722 [ # # ]: 0 : vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1723 : : unsigned long address = vma_address(page, vma);
1724 : 0 : ret = rmap_one(page, vma, address, arg);
1725 [ # # ]: 0 : if (ret != SWAP_AGAIN)
1726 : : break;
1727 : : }
1728 : : /*
1729 : : * No nonlinear handling: being always shared, nonlinear vmas
1730 : : * never contain migration ptes. Decide what to do about this
1731 : : * limitation to linear when we need rmap_walk() on nonlinear.
1732 : : */
1733 : 0 : mutex_unlock(&mapping->i_mmap_mutex);
1734 : 0 : return ret;
1735 : : }
1736 : :
1737 : 0 : int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1738 : : struct vm_area_struct *, unsigned long, void *), void *arg)
1739 : : {
1740 : : VM_BUG_ON(!PageLocked(page));
1741 : :
1742 : : if (unlikely(PageKsm(page)))
1743 : : return rmap_walk_ksm(page, rmap_one, arg);
1744 [ # # ]: 0 : else if (PageAnon(page))
1745 : 0 : return rmap_walk_anon(page, rmap_one, arg);
1746 : : else
1747 : 0 : return rmap_walk_file(page, rmap_one, arg);
1748 : : }
1749 : : #endif /* CONFIG_MIGRATION */
1750 : :
1751 : : #ifdef CONFIG_HUGETLB_PAGE
1752 : : /*
1753 : : * The following three functions are for anonymous (private mapped) hugepages.
1754 : : * Unlike common anonymous pages, anonymous hugepages have no accounting code
1755 : : * and no lru code, because we handle hugepages differently from common pages.
1756 : : */
1757 : : static void __hugepage_set_anon_rmap(struct page *page,
1758 : : struct vm_area_struct *vma, unsigned long address, int exclusive)
1759 : : {
1760 : : struct anon_vma *anon_vma = vma->anon_vma;
1761 : :
1762 : : BUG_ON(!anon_vma);
1763 : :
1764 : : if (PageAnon(page))
1765 : : return;
1766 : : if (!exclusive)
1767 : : anon_vma = anon_vma->root;
1768 : :
1769 : : anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1770 : : page->mapping = (struct address_space *) anon_vma;
1771 : : page->index = linear_page_index(vma, address);
1772 : : }
1773 : :
1774 : : void hugepage_add_anon_rmap(struct page *page,
1775 : : struct vm_area_struct *vma, unsigned long address)
1776 : : {
1777 : : struct anon_vma *anon_vma = vma->anon_vma;
1778 : : int first;
1779 : :
1780 : : BUG_ON(!PageLocked(page));
1781 : : BUG_ON(!anon_vma);
1782 : : /* address might be in next vma when migration races vma_adjust */
1783 : : first = atomic_inc_and_test(&page->_mapcount);
1784 : : if (first)
1785 : : __hugepage_set_anon_rmap(page, vma, address, 0);
1786 : : }
1787 : :
1788 : : void hugepage_add_new_anon_rmap(struct page *page,
1789 : : struct vm_area_struct *vma, unsigned long address)
1790 : : {
1791 : : BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1792 : : atomic_set(&page->_mapcount, 0);
1793 : : __hugepage_set_anon_rmap(page, vma, address, 1);
1794 : : }
1795 : : #endif /* CONFIG_HUGETLB_PAGE */
|