Branch data Line data Source code
1 : : /*
2 : : * linux/mm/mlock.c
3 : : *
4 : : * (C) Copyright 1995 Linus Torvalds
5 : : * (C) Copyright 2002 Christoph Hellwig
6 : : */
7 : :
8 : : #include <linux/capability.h>
9 : : #include <linux/mman.h>
10 : : #include <linux/mm.h>
11 : : #include <linux/swap.h>
12 : : #include <linux/swapops.h>
13 : : #include <linux/pagemap.h>
14 : : #include <linux/pagevec.h>
15 : : #include <linux/mempolicy.h>
16 : : #include <linux/syscalls.h>
17 : : #include <linux/sched.h>
18 : : #include <linux/export.h>
19 : : #include <linux/rmap.h>
20 : : #include <linux/mmzone.h>
21 : : #include <linux/hugetlb.h>
22 : : #include <linux/memcontrol.h>
23 : : #include <linux/mm_inline.h>
24 : :
25 : : #include "internal.h"
26 : :
27 : 0 : int can_do_mlock(void)
28 : : {
29 [ + + ]: 203 : if (capable(CAP_IPC_LOCK))
30 : : return 1;
31 [ + + ]: 4 : if (rlimit(RLIMIT_MEMLOCK) != 0)
32 : : return 1;
33 : 2 : return 0;
34 : : }
35 : : EXPORT_SYMBOL(can_do_mlock);
36 : :
37 : : /*
38 : : * Mlocked pages are marked with PageMlocked() flag for efficient testing
39 : : * in vmscan and, possibly, the fault path; and to support semi-accurate
40 : : * statistics.
41 : : *
42 : : * An mlocked page [PageMlocked(page)] is unevictable. As such, it will
43 : : * be placed on the LRU "unevictable" list, rather than the [in]active lists.
44 : : * The unevictable list is an LRU sibling list to the [in]active lists.
45 : : * PageUnevictable is set to indicate the unevictable state.
46 : : *
47 : : * When lazy mlocking via vmscan, it is important to ensure that the
48 : : * vma's VM_LOCKED status is not concurrently being modified, otherwise we
49 : : * may have mlocked a page that is being munlocked. So lazy mlock must take
50 : : * the mmap_sem for read, and verify that the vma really is locked
51 : : * (see mm/rmap.c).
52 : : */
53 : :
54 : : /*
55 : : * LRU accounting for clear_page_mlock()
56 : : */
57 : 0 : void clear_page_mlock(struct page *page)
58 : : {
59 [ # # ]: 0 : if (!TestClearPageMlocked(page))
60 : 0 : return;
61 : :
62 : 0 : mod_zone_page_state(page_zone(page), NR_MLOCK,
63 : : -hpage_nr_pages(page));
64 : : count_vm_event(UNEVICTABLE_PGCLEARED);
65 [ # # ]: 0 : if (!isolate_lru_page(page)) {
66 : 0 : putback_lru_page(page);
67 : : } else {
68 : : /*
69 : : * We lost the race. the page already moved to evictable list.
70 : : */
71 [ # # ]: 0 : if (PageUnevictable(page))
72 : : count_vm_event(UNEVICTABLE_PGSTRANDED);
73 : : }
74 : : }
75 : :
76 : : /*
77 : : * Mark page as mlocked if not already.
78 : : * If page on LRU, isolate and putback to move to unevictable list.
79 : : */
80 : 0 : void mlock_vma_page(struct page *page)
81 : : {
82 [ - + ]: 10406 : BUG_ON(!PageLocked(page));
83 : :
84 [ + + ]: 10406 : if (!TestSetPageMlocked(page)) {
85 : 3782 : mod_zone_page_state(page_zone(page), NR_MLOCK,
86 : : hpage_nr_pages(page));
87 : : count_vm_event(UNEVICTABLE_PGMLOCKED);
88 [ + - ]: 3782 : if (!isolate_lru_page(page))
89 : 3782 : putback_lru_page(page);
90 : : }
91 : 0 : }
92 : :
93 : : /*
94 : : * Finish munlock after successful page isolation
95 : : *
96 : : * Page must be locked. This is a wrapper for try_to_munlock()
97 : : * and putback_lru_page() with munlock accounting.
98 : : */
99 : 0 : static void __munlock_isolated_page(struct page *page)
100 : : {
101 : : int ret = SWAP_AGAIN;
102 : :
103 : : /*
104 : : * Optimization: if the page was mapped just once, that's our mapping
105 : : * and we don't need to check all the other vmas.
106 : : */
107 [ + + ]: 1881 : if (page_mapcount(page) > 1)
108 : 1868 : ret = try_to_munlock(page);
109 : :
110 : : /* Did try_to_unlock() succeed or punt? */
111 [ + - ]: 1881 : if (ret != SWAP_MLOCK)
112 : : count_vm_event(UNEVICTABLE_PGMUNLOCKED);
113 : :
114 : 1881 : putback_lru_page(page);
115 : 1881 : }
116 : :
117 : : /*
118 : : * Accounting for page isolation fail during munlock
119 : : *
120 : : * Performs accounting when page isolation fails in munlock. There is nothing
121 : : * else to do because it means some other task has already removed the page
122 : : * from the LRU. putback_lru_page() will take care of removing the page from
123 : : * the unevictable list, if necessary. vmscan [page_referenced()] will move
124 : : * the page back to the unevictable list if some other vma has it mlocked.
125 : : */
126 : 0 : static void __munlock_isolation_failed(struct page *page)
127 : : {
128 [ # # ]: 0 : if (PageUnevictable(page))
129 : : count_vm_event(UNEVICTABLE_PGSTRANDED);
130 : : else
131 : : count_vm_event(UNEVICTABLE_PGMUNLOCKED);
132 : 0 : }
133 : :
134 : : /**
135 : : * munlock_vma_page - munlock a vma page
136 : : * @page - page to be unlocked, either a normal page or THP page head
137 : : *
138 : : * returns the size of the page as a page mask (0 for normal page,
139 : : * HPAGE_PMD_NR - 1 for THP head page)
140 : : *
141 : : * called from munlock()/munmap() path with page supposedly on the LRU.
142 : : * When we munlock a page, because the vma where we found the page is being
143 : : * munlock()ed or munmap()ed, we want to check whether other vmas hold the
144 : : * page locked so that we can leave it on the unevictable lru list and not
145 : : * bother vmscan with it. However, to walk the page's rmap list in
146 : : * try_to_munlock() we must isolate the page from the LRU. If some other
147 : : * task has removed the page from the LRU, we won't be able to do that.
148 : : * So we clear the PageMlocked as we might not get another chance. If we
149 : : * can't isolate the page, we leave it for putback_lru_page() and vmscan
150 : : * [page_referenced()/try_to_unmap()] to deal with.
151 : : */
152 : 0 : unsigned int munlock_vma_page(struct page *page)
153 : : {
154 : : unsigned int nr_pages;
155 : :
156 [ - + ]: 13 : BUG_ON(!PageLocked(page));
157 : :
158 [ + - ]: 13 : if (TestClearPageMlocked(page)) {
159 : : nr_pages = hpage_nr_pages(page);
160 : 13 : mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
161 [ + - ]: 13 : if (!isolate_lru_page(page))
162 : 13 : __munlock_isolated_page(page);
163 : : else
164 : 0 : __munlock_isolation_failed(page);
165 : : } else {
166 : : nr_pages = hpage_nr_pages(page);
167 : : }
168 : :
169 : : /*
170 : : * Regardless of the original PageMlocked flag, we determine nr_pages
171 : : * after touching the flag. This leaves a possible race with a THP page
172 : : * split, such that a whole THP page was munlocked, but nr_pages == 1.
173 : : * Returning a smaller mask due to that is OK, the worst that can
174 : : * happen is subsequent useless scanning of the former tail pages.
175 : : * The NR_MLOCK accounting can however become broken.
176 : : */
177 : 0 : return nr_pages - 1;
178 : : }
179 : :
180 : : /**
181 : : * __mlock_vma_pages_range() - mlock a range of pages in the vma.
182 : : * @vma: target vma
183 : : * @start: start address
184 : : * @end: end address
185 : : *
186 : : * This takes care of making the pages present too.
187 : : *
188 : : * return 0 on success, negative error code on error.
189 : : *
190 : : * vma->vm_mm->mmap_sem must be held for at least read.
191 : : */
192 : 0 : long __mlock_vma_pages_range(struct vm_area_struct *vma,
193 : : unsigned long start, unsigned long end, int *nonblocking)
194 : : {
195 : 1874 : struct mm_struct *mm = vma->vm_mm;
196 : 1874 : unsigned long nr_pages = (end - start) / PAGE_SIZE;
197 : : int gup_flags;
198 : :
199 : : VM_BUG_ON(start & ~PAGE_MASK);
200 : : VM_BUG_ON(end & ~PAGE_MASK);
201 : : VM_BUG_ON(start < vma->vm_start);
202 : : VM_BUG_ON(end > vma->vm_end);
203 : : VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
204 : :
205 : : gup_flags = FOLL_TOUCH | FOLL_MLOCK;
206 : : /*
207 : : * We want to touch writable mappings with a write fault in order
208 : : * to break COW, except for shared mappings because these don't COW
209 : : * and we would not want to dirty them for nothing.
210 : : */
211 [ + + ]: 1874 : if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
212 : : gup_flags |= FOLL_WRITE;
213 : :
214 : : /*
215 : : * We want mlock to succeed for regions that have any permissions
216 : : * other than PROT_NONE.
217 : : */
218 [ + + ]: 1874 : if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
219 : 1860 : gup_flags |= FOLL_FORCE;
220 : :
221 : : /*
222 : : * We made sure addr is within a VMA, so the following will
223 : : * not result in a stack expansion that recurses back here.
224 : : */
225 : 1874 : return __get_user_pages(current, mm, start, nr_pages, gup_flags,
226 : : NULL, NULL, nonblocking);
227 : : }
228 : :
229 : : /*
230 : : * convert get_user_pages() return value to posix mlock() error
231 : : */
232 : : static int __mlock_posix_error_return(long retval)
233 : : {
234 [ - + ]: 10 : if (retval == -EFAULT)
235 : : retval = -ENOMEM;
236 [ # # ]: 0 : else if (retval == -ENOMEM)
237 : : retval = -EAGAIN;
238 : : return retval;
239 : : }
240 : :
241 : : /*
242 : : * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
243 : : *
244 : : * The fast path is available only for evictable pages with single mapping.
245 : : * Then we can bypass the per-cpu pvec and get better performance.
246 : : * when mapcount > 1 we need try_to_munlock() which can fail.
247 : : * when !page_evictable(), we need the full redo logic of putback_lru_page to
248 : : * avoid leaving evictable page in unevictable list.
249 : : *
250 : : * In case of success, @page is added to @pvec and @pgrescued is incremented
251 : : * in case that the page was previously unevictable. @page is also unlocked.
252 : : */
253 : 0 : static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
254 : : int *pgrescued)
255 : : {
256 : : VM_BUG_ON(PageLRU(page));
257 : : VM_BUG_ON(!PageLocked(page));
258 : :
259 [ + + ][ + - ]: 10047 : if (page_mapcount(page) <= 1 && page_evictable(page)) {
260 : : pagevec_add(pvec, page);
261 [ + - ]: 8179 : if (TestClearPageUnevictable(page))
262 : 8179 : (*pgrescued)++;
263 : 8179 : unlock_page(page);
264 : 8179 : return true;
265 : : }
266 : :
267 : : return false;
268 : : }
269 : :
270 : : /*
271 : : * Putback multiple evictable pages to the LRU
272 : : *
273 : : * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
274 : : * the pages might have meanwhile become unevictable but that is OK.
275 : : */
276 : 0 : static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
277 : : {
278 : : count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
279 : : /*
280 : : *__pagevec_lru_add() calls release_pages() so we don't call
281 : : * put_page() explicitly
282 : : */
283 : 1043 : __pagevec_lru_add(pvec);
284 : : count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
285 : 1043 : }
286 : :
287 : : /*
288 : : * Munlock a batch of pages from the same zone
289 : : *
290 : : * The work is split to two main phases. First phase clears the Mlocked flag
291 : : * and attempts to isolate the pages, all under a single zone lru lock.
292 : : * The second phase finishes the munlock only for pages where isolation
293 : : * succeeded.
294 : : *
295 : : * Note that the pagevec may be modified during the process.
296 : : */
297 : 0 : static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
298 : : {
299 : : int i;
300 : 1087 : int nr = pagevec_count(pvec);
301 : : int delta_munlocked;
302 : : struct pagevec pvec_putback;
303 : 1087 : int pgrescued = 0;
304 : :
305 : : pagevec_init(&pvec_putback, 0);
306 : :
307 : : /* Phase 1: page isolation */
308 : : spin_lock_irq(&zone->lru_lock);
309 [ + + ]: 11134 : for (i = 0; i < nr; i++) {
310 : 10047 : struct page *page = pvec->pages[i];
311 : :
312 [ + - ]: 10047 : if (TestClearPageMlocked(page)) {
313 : : struct lruvec *lruvec;
314 : : int lru;
315 : :
316 [ + - ]: 10047 : if (PageLRU(page)) {
317 : : lruvec = mem_cgroup_page_lruvec(page, zone);
318 : : lru = page_lru(page);
319 : : /*
320 : : * We already have pin from follow_page_mask()
321 : : * so we can spare the get_page() here.
322 : : */
323 : : ClearPageLRU(page);
324 : : del_page_from_lru_list(page, lruvec, lru);
325 : : } else {
326 : 0 : __munlock_isolation_failed(page);
327 : 0 : goto skip_munlock;
328 : : }
329 : :
330 : : } else {
331 : : skip_munlock:
332 : : /*
333 : : * We won't be munlocking this page in the next phase
334 : : * but we still need to release the follow_page_mask()
335 : : * pin. We cannot do it under lru_lock however. If it's
336 : : * the last pin, __page_cache_release would deadlock.
337 : : */
338 : 0 : pagevec_add(&pvec_putback, pvec->pages[i]);
339 : 0 : pvec->pages[i] = NULL;
340 : : }
341 : : }
342 : 1087 : delta_munlocked = -nr + pagevec_count(&pvec_putback);
343 : 1087 : __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
344 : : spin_unlock_irq(&zone->lru_lock);
345 : :
346 : : /* Now we can release pins of pages that we are not munlocking */
347 : : pagevec_release(&pvec_putback);
348 : :
349 : : /* Phase 2: page munlock */
350 [ + + ]: 11134 : for (i = 0; i < nr; i++) {
351 : 10047 : struct page *page = pvec->pages[i];
352 : :
353 [ + - ]: 10047 : if (page) {
354 : : lock_page(page);
355 [ + + ]: 10047 : if (!__putback_lru_fast_prepare(page, &pvec_putback,
356 : : &pgrescued)) {
357 : : /*
358 : : * Slow path. We don't want to lose the last
359 : : * pin before unlock_page()
360 : : */
361 : : get_page(page); /* for putback_lru_page() */
362 : 1868 : __munlock_isolated_page(page);
363 : 1868 : unlock_page(page);
364 : 1868 : put_page(page); /* from follow_page_mask() */
365 : : }
366 : : }
367 : : }
368 : :
369 : : /*
370 : : * Phase 3: page putback for pages that qualified for the fast path
371 : : * This will also call put_page() to return pin from follow_page_mask()
372 : : */
373 [ + + ]: 1087 : if (pagevec_count(&pvec_putback))
374 : 1043 : __putback_lru_fast(&pvec_putback, pgrescued);
375 : 1087 : }
376 : :
377 : : /*
378 : : * Fill up pagevec for __munlock_pagevec using pte walk
379 : : *
380 : : * The function expects that the struct page corresponding to @start address is
381 : : * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
382 : : *
383 : : * The rest of @pvec is filled by subsequent pages within the same pmd and same
384 : : * zone, as long as the pte's are present and vm_normal_page() succeeds. These
385 : : * pages also get pinned.
386 : : *
387 : : * Returns the address of the next page that should be scanned. This equals
388 : : * @start + PAGE_SIZE when no page could be added by the pte walk.
389 : : */
390 : 0 : static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
391 : : struct vm_area_struct *vma, int zoneid, unsigned long start,
392 : : unsigned long end)
393 : : {
394 : : pte_t *pte;
395 : : spinlock_t *ptl;
396 : :
397 : : /*
398 : : * Initialize pte walk starting at the already pinned page where we
399 : : * are sure that there is a pte, as it was pinned under the same
400 : : * mmap_sem write op.
401 : : */
402 : 1087 : pte = get_locked_pte(vma->vm_mm, start, &ptl);
403 : : /* Make sure we do not cross the page table boundary */
404 [ + + ]: 1087 : end = pgd_addr_end(start, end);
405 : : end = pud_addr_end(start, end);
406 : : end = pmd_addr_end(start, end);
407 : :
408 : : /* The page next to the pinned page is the first we will try to get */
409 : 1087 : start += PAGE_SIZE;
410 [ + + ]: 9429 : while (start < end) {
411 : 9030 : struct page *page = NULL;
412 : 9030 : pte++;
413 [ + - ]: 9030 : if (pte_present(*pte))
414 : 9030 : page = vm_normal_page(vma, start, *pte);
415 : : /*
416 : : * Break if page could not be obtained or the page's node+zone does not
417 : : * match
418 : : */
419 [ + - ][ + + ]: 9030 : if (!page || page_zone_id(page) != zoneid)
420 : : break;
421 : :
422 : : get_page(page);
423 : : /*
424 : : * Increase the address that will be returned *before* the
425 : : * eventual break due to pvec becoming full by adding the page
426 : : */
427 : 8960 : start += PAGE_SIZE;
428 [ + + ]: 8960 : if (pagevec_add(pvec, page) == 0)
429 : : break;
430 : : }
431 : 1087 : pte_unmap_unlock(pte, ptl);
432 : 1087 : return start;
433 : : }
434 : :
435 : : /*
436 : : * munlock_vma_pages_range() - munlock all pages in the vma range.'
437 : : * @vma - vma containing range to be munlock()ed.
438 : : * @start - start address in @vma of the range
439 : : * @end - end of range in @vma.
440 : : *
441 : : * For mremap(), munmap() and exit().
442 : : *
443 : : * Called with @vma VM_LOCKED.
444 : : *
445 : : * Returns with VM_LOCKED cleared. Callers must be prepared to
446 : : * deal with this.
447 : : *
448 : : * We don't save and restore VM_LOCKED here because pages are
449 : : * still on lru. In unmap path, pages might be scanned by reclaim
450 : : * and re-mlocked by try_to_{munlock|unmap} before we unmap and
451 : : * free them. This will result in freeing mlocked pages.
452 : : */
453 : 0 : void munlock_vma_pages_range(struct vm_area_struct *vma,
454 : : unsigned long start, unsigned long end)
455 : : {
456 : 398 : vma->vm_flags &= ~VM_LOCKED;
457 : :
458 [ + + ]: 1578 : while (start < end) {
459 : : struct page *page = NULL;
460 : : unsigned int page_mask;
461 : : unsigned long page_increm;
462 : : struct pagevec pvec;
463 : : struct zone *zone;
464 : : int zoneid;
465 : :
466 : : pagevec_init(&pvec, 0);
467 : : /*
468 : : * Although FOLL_DUMP is intended for get_dump_page(),
469 : : * it just so happens that its special treatment of the
470 : : * ZERO_PAGE (returning an error instead of doing get_page)
471 : : * suits munlock very well (and if somehow an abnormal page
472 : : * has sneaked into the range, we won't oops here: great).
473 : : */
474 : 1180 : page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
475 : : &page_mask);
476 : :
477 [ + + ][ + + ]: 1578 : if (page && !IS_ERR(page)) {
478 : : if (PageTransHuge(page)) {
479 : : lock_page(page);
480 : : /*
481 : : * Any THP page found by follow_page_mask() may
482 : : * have gotten split before reaching
483 : : * munlock_vma_page(), so we need to recompute
484 : : * the page_mask here.
485 : : */
486 : : page_mask = munlock_vma_page(page);
487 : : unlock_page(page);
488 : : put_page(page); /* follow_page_mask() */
489 : : } else {
490 : : /*
491 : : * Non-huge pages are handled in batches via
492 : : * pagevec. The pin from follow_page_mask()
493 : : * prevents them from collapsing by THP.
494 : : */
495 : : pagevec_add(&pvec, page);
496 : 1087 : zone = page_zone(page);
497 : : zoneid = page_zone_id(page);
498 : :
499 : : /*
500 : : * Try to fill the rest of pagevec using fast
501 : : * pte walk. This will also update start to
502 : : * the next page to process. Then munlock the
503 : : * pagevec.
504 : : */
505 : 1087 : start = __munlock_pagevec_fill(&pvec, vma,
506 : : zoneid, start, end);
507 : 1087 : __munlock_pagevec(&pvec, zone);
508 : 1087 : goto next;
509 : : }
510 : : }
511 : : /* It's a bug to munlock in the middle of a THP page */
512 : : VM_BUG_ON((start >> PAGE_SHIFT) & page_mask);
513 : 93 : page_increm = 1 + page_mask;
514 : 93 : start += page_increm * PAGE_SIZE;
515 : : next:
516 : 1180 : cond_resched();
517 : : }
518 : 398 : }
519 : :
520 : : /*
521 : : * mlock_fixup - handle mlock[all]/munlock[all] requests.
522 : : *
523 : : * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
524 : : * munlock is a no-op. However, for some special vmas, we go ahead and
525 : : * populate the ptes.
526 : : *
527 : : * For vmas that pass the filters, merge/split as appropriate.
528 : : */
529 : 0 : static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
530 : : unsigned long start, unsigned long end, vm_flags_t newflags)
531 : : {
532 : 460 : struct mm_struct *mm = vma->vm_mm;
533 : : pgoff_t pgoff;
534 : : int nr_pages;
535 : : int ret = 0;
536 : 460 : int lock = !!(newflags & VM_LOCKED);
537 : :
538 [ + + ][ + + ]: 460 : if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) ||
539 [ + - ]: 361 : is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm))
540 : : goto out; /* don't set VM_LOCKED, don't count */
541 : :
542 : 361 : pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
543 : 361 : *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
544 : : vma->vm_file, pgoff, vma_policy(vma),
545 : : vma_get_anon_name(vma));
546 [ + + ]: 361 : if (*prev) {
547 : : vma = *prev;
548 : : goto success;
549 : : }
550 : :
551 [ + + ]: 347 : if (start != vma->vm_start) {
552 : 11 : ret = split_vma(mm, vma, start, 1);
553 [ + ]: 11 : if (ret)
554 : : goto out;
555 : : }
556 : :
557 [ + + ]: 807 : if (end != vma->vm_end) {
558 : 4 : ret = split_vma(mm, vma, end, 0);
559 [ + - ]: 4 : if (ret)
560 : : goto out;
561 : : }
562 : :
563 : : success:
564 : : /*
565 : : * Keep track of amount of locked VM.
566 : : */
567 : 361 : nr_pages = (end - start) >> PAGE_SHIFT;
568 [ + + ]: 361 : if (!lock)
569 : 166 : nr_pages = -nr_pages;
570 : 361 : mm->locked_vm += nr_pages;
571 : :
572 : : /*
573 : : * vm_flags is protected by the mmap_sem held in write mode.
574 : : * It's okay if try_to_unmap_one unmaps a page just after we
575 : : * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
576 : : */
577 : :
578 [ + + ]: 361 : if (lock)
579 : 195 : vma->vm_flags = newflags;
580 : : else
581 : 166 : munlock_vma_pages_range(vma, start, end);
582 : :
583 : : out:
584 : 0 : *prev = vma;
585 : 0 : return ret;
586 : : }
587 : :
588 : 0 : static int do_mlock(unsigned long start, size_t len, int on)
589 : : {
590 : : unsigned long nstart, end, tmp;
591 : : struct vm_area_struct * vma, * prev;
592 : : int error;
593 : :
594 : : VM_BUG_ON(start & ~PAGE_MASK);
595 : : VM_BUG_ON(len != PAGE_ALIGN(len));
596 : 400 : end = start + len;
597 [ + - ]: 400 : if (end < start)
598 : : return -EINVAL;
599 [ + + ]: 400 : if (end == start)
600 : : return 0;
601 : 390 : vma = find_vma(current->mm, start);
602 [ + + ][ + + ]: 390 : if (!vma || vma->vm_start > start)
603 : : return -ENOMEM;
604 : :
605 : 359 : prev = vma->vm_prev;
606 [ + + ]: 359 : if (start > vma->vm_start)
607 : 359 : prev = vma;
608 : :
609 : : for (nstart = start ; ; ) {
610 : : vm_flags_t newflags;
611 : :
612 : : /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
613 : :
614 : 359 : newflags = vma->vm_flags & ~VM_LOCKED;
615 [ + + ]: 359 : if (on)
616 : 173 : newflags |= VM_LOCKED;
617 : :
618 : 359 : tmp = vma->vm_end;
619 [ + + ]: 359 : if (tmp > end)
620 : : tmp = end;
621 : 359 : error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
622 [ + - ]: 759 : if (error)
623 : : break;
624 : : nstart = tmp;
625 [ + + ]: 359 : if (nstart < prev->vm_end)
626 : : nstart = prev->vm_end;
627 [ + + ]: 359 : if (nstart >= end)
628 : : break;
629 : :
630 : 1 : vma = prev->vm_next;
631 [ + - ][ - + ]: 1 : if (!vma || vma->vm_start != nstart) {
632 : : error = -ENOMEM;
633 : : break;
634 : : }
635 : : }
636 : 359 : return error;
637 : : }
638 : :
639 : : /*
640 : : * __mm_populate - populate and/or mlock pages within a range of address space.
641 : : *
642 : : * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
643 : : * flags. VMAs must be already marked with the desired vm_flags, and
644 : : * mmap_sem must not be held.
645 : : */
646 : 0 : int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
647 : : {
648 : 404 : struct mm_struct *mm = current->mm;
649 : : unsigned long end, nstart, nend;
650 : : struct vm_area_struct *vma = NULL;
651 : 404 : int locked = 0;
652 : : long ret = 0;
653 : :
654 : : VM_BUG_ON(start & ~PAGE_MASK);
655 : : VM_BUG_ON(len != PAGE_ALIGN(len));
656 : 404 : end = start + len;
657 : :
658 [ + + ]: 2268 : for (nstart = start; nstart < end; nstart = nend) {
659 : : /*
660 : : * We want to fault in pages for [nstart; end) address range.
661 : : * Find first corresponding VMA.
662 : : */
663 [ + + ]: 1877 : if (!locked) {
664 : 1825 : locked = 1;
665 : 1825 : down_read(&mm->mmap_sem);
666 : 1825 : vma = find_vma(mm, nstart);
667 [ + - ]: 52 : } else if (nstart >= vma->vm_end)
668 : 52 : vma = vma->vm_next;
669 [ + + ][ + - ]: 2281 : if (!vma || vma->vm_start >= end)
670 : : break;
671 : : /*
672 : : * Set [nstart; nend) to intersection of desired address
673 : : * range with the first VMA. Also, skip undesirable VMA types.
674 : : */
675 : 1874 : nend = min(end, vma->vm_end);
676 [ - + ]: 1874 : if (vma->vm_flags & (VM_IO | VM_PFNMAP))
677 : 0 : continue;
678 [ + + ]: 1874 : if (nstart < vma->vm_start)
679 : : nstart = vma->vm_start;
680 : : /*
681 : : * Now fault in a range of pages. __mlock_vma_pages_range()
682 : : * double checks the vma flags, so that it won't mlock pages
683 : : * if the vma was already munlocked.
684 : : */
685 : 1874 : ret = __mlock_vma_pages_range(vma, nstart, nend, &locked);
686 [ + + ]: 1874 : if (ret < 0) {
687 [ + + ]: 14 : if (ignore_errors) {
688 : : ret = 0;
689 : 4 : continue; /* continue at next VMA */
690 : : }
691 : : ret = __mlock_posix_error_return(ret);
692 : 10 : break;
693 : : }
694 : 1864 : nend = nstart + ret * PAGE_SIZE;
695 : : ret = 0;
696 : : }
697 [ + - ]: 404 : if (locked)
698 : 404 : up_read(&mm->mmap_sem);
699 : 404 : return ret; /* 0 or negative error code */
700 : : }
701 : :
702 : 0 : SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
703 : : {
704 : : unsigned long locked;
705 : : unsigned long lock_limit;
706 : : int error = -ENOMEM;
707 : :
708 [ + - ]: 194 : if (!can_do_mlock())
709 : : return -EPERM;
710 : :
711 : 194 : lru_add_drain_all(); /* flush pagevec */
712 : :
713 : 194 : down_write(¤t->mm->mmap_sem);
714 : 194 : len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
715 : 194 : start &= PAGE_MASK;
716 : :
717 : 194 : locked = len >> PAGE_SHIFT;
718 : 194 : locked += current->mm->locked_vm;
719 : :
720 : : lock_limit = rlimit(RLIMIT_MEMLOCK);
721 : 194 : lock_limit >>= PAGE_SHIFT;
722 : :
723 : : /* check against resource limits */
724 [ + + ][ + - ]: 194 : if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
725 : 194 : error = do_mlock(start, len, 1);
726 : 194 : up_write(¤t->mm->mmap_sem);
727 [ + + ]: 194 : if (!error)
728 : 173 : error = __mm_populate(start, len, 0);
729 : : return error;
730 : : }
731 : :
732 : 0 : SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
733 : : {
734 : : int ret;
735 : :
736 : 206 : down_write(¤t->mm->mmap_sem);
737 : 206 : len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
738 : 206 : start &= PAGE_MASK;
739 : 206 : ret = do_mlock(start, len, 0);
740 : 206 : up_write(¤t->mm->mmap_sem);
741 : : return ret;
742 : : }
743 : :
744 : 0 : static int do_mlockall(int flags)
745 : : {
746 : 6 : struct vm_area_struct * vma, * prev = NULL;
747 : :
748 [ + + ]: 6 : if (flags & MCL_FUTURE)
749 : 3 : current->mm->def_flags |= VM_LOCKED;
750 : : else
751 : 3 : current->mm->def_flags &= ~VM_LOCKED;
752 [ + + ]: 6 : if (flags == MCL_FUTURE)
753 : : goto out;
754 : :
755 [ + + ]: 106 : for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
756 : : vm_flags_t newflags;
757 : :
758 : 101 : newflags = vma->vm_flags & ~VM_LOCKED;
759 [ + + ]: 101 : if (flags & MCL_CURRENT)
760 : 52 : newflags |= VM_LOCKED;
761 : :
762 : : /* Ignore errors */
763 : 101 : mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
764 : 101 : cond_resched();
765 : : }
766 : : out:
767 : 6 : return 0;
768 : : }
769 : :
770 : 0 : SYSCALL_DEFINE1(mlockall, int, flags)
771 : : {
772 : : unsigned long lock_limit;
773 : : int ret = -EINVAL;
774 : :
775 [ + + ][ + + ]: 10 : if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
776 : : goto out;
777 : :
778 : : ret = -EPERM;
779 [ + + ]: 8 : if (!can_do_mlock())
780 : : goto out;
781 : :
782 [ + + ]: 6 : if (flags & MCL_CURRENT)
783 : 5 : lru_add_drain_all(); /* flush pagevec */
784 : :
785 : 6 : down_write(¤t->mm->mmap_sem);
786 : :
787 : : lock_limit = rlimit(RLIMIT_MEMLOCK);
788 : 6 : lock_limit >>= PAGE_SHIFT;
789 : :
790 : : ret = -ENOMEM;
791 [ + + ]: 11 : if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
[ + - + + ]
792 : 5 : capable(CAP_IPC_LOCK))
793 : 4 : ret = do_mlockall(flags);
794 : 6 : up_write(¤t->mm->mmap_sem);
795 [ + + ][ + + ]: 6 : if (!ret && (flags & MCL_CURRENT))
796 : : mm_populate(0, TASK_SIZE);
797 : : out:
798 : : return ret;
799 : : }
800 : :
801 : 0 : SYSCALL_DEFINE0(munlockall)
802 : : {
803 : : int ret;
804 : :
805 : 2 : down_write(¤t->mm->mmap_sem);
806 : 2 : ret = do_mlockall(0);
807 : 2 : up_write(¤t->mm->mmap_sem);
808 : 2 : return ret;
809 : : }
810 : :
811 : : /*
812 : : * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
813 : : * shm segments) get accounted against the user_struct instead.
814 : : */
815 : : static DEFINE_SPINLOCK(shmlock_user_lock);
816 : :
817 : 0 : int user_shm_lock(size_t size, struct user_struct *user)
818 : : {
819 : : unsigned long lock_limit, locked;
820 : : int allowed = 0;
821 : :
822 : 2 : locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
823 : : lock_limit = rlimit(RLIMIT_MEMLOCK);
824 [ - + ]: 2 : if (lock_limit == RLIM_INFINITY)
825 : : allowed = 1;
826 : 2 : lock_limit >>= PAGE_SHIFT;
827 : : spin_lock(&shmlock_user_lock);
828 [ + - ][ - + ]: 2 : if (!allowed &&
829 [ # # ]: 0 : locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
830 : : goto out;
831 : : get_uid(user);
832 : 2 : user->locked_shm += locked;
833 : : allowed = 1;
834 : : out:
835 : : spin_unlock(&shmlock_user_lock);
836 : 2 : return allowed;
837 : : }
838 : :
839 : 0 : void user_shm_unlock(size_t size, struct user_struct *user)
840 : : {
841 : : spin_lock(&shmlock_user_lock);
842 : 2 : user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
843 : : spin_unlock(&shmlock_user_lock);
844 : 2 : free_uid(user);
845 : 2 : }
|