Branch data Line data Source code
1 : : #ifndef _LINUX_PAGEMAP_H
2 : : #define _LINUX_PAGEMAP_H
3 : :
4 : : /*
5 : : * Copyright 1995 Linus Torvalds
6 : : */
7 : : #include <linux/mm.h>
8 : : #include <linux/fs.h>
9 : : #include <linux/list.h>
10 : : #include <linux/highmem.h>
11 : : #include <linux/compiler.h>
12 : : #include <asm/uaccess.h>
13 : : #include <linux/gfp.h>
14 : : #include <linux/bitops.h>
15 : : #include <linux/hardirq.h> /* for in_interrupt() */
16 : : #include <linux/hugetlb_inline.h>
17 : :
18 : : /*
19 : : * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page
20 : : * allocation mode flags.
21 : : */
22 : : enum mapping_flags {
23 : : AS_EIO = __GFP_BITS_SHIFT + 0, /* IO error on async write */
24 : : AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */
25 : : AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */
26 : : AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */
27 : : AS_BALLOON_MAP = __GFP_BITS_SHIFT + 4, /* balloon page special map */
28 : : };
29 : :
30 : : static inline void mapping_set_error(struct address_space *mapping, int error)
31 : : {
32 [ - + ][ # # ]: 73877 : if (unlikely(error)) {
33 [ # # ]: 0 : if (error == -ENOSPC)
34 : 0 : set_bit(AS_ENOSPC, &mapping->flags);
35 : : else
36 : 0 : set_bit(AS_EIO, &mapping->flags);
37 : : }
38 : : }
39 : :
40 : : static inline void mapping_set_unevictable(struct address_space *mapping)
41 : : {
42 : 2 : set_bit(AS_UNEVICTABLE, &mapping->flags);
43 : : }
44 : :
45 : : static inline void mapping_clear_unevictable(struct address_space *mapping)
46 : : {
47 : 2 : clear_bit(AS_UNEVICTABLE, &mapping->flags);
48 : : }
49 : :
50 : : static inline int mapping_unevictable(struct address_space *mapping)
51 : : {
52 [ + + ][ + - ]: 171272 : if (mapping)
53 : : return test_bit(AS_UNEVICTABLE, &mapping->flags);
54 : 8784 : return !!mapping;
55 : : }
56 : :
57 : : static inline void mapping_set_balloon(struct address_space *mapping)
58 : : {
59 : : set_bit(AS_BALLOON_MAP, &mapping->flags);
60 : : }
61 : :
62 : : static inline void mapping_clear_balloon(struct address_space *mapping)
63 : : {
64 : : clear_bit(AS_BALLOON_MAP, &mapping->flags);
65 : : }
66 : :
67 : : static inline int mapping_balloon(struct address_space *mapping)
68 : : {
69 : : return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags);
70 : : }
71 : :
72 : : static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
73 : : {
74 : 7686982 : return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
75 : : }
76 : :
77 : : /*
78 : : * This is non-atomic. Only to be used before the mapping is activated.
79 : : * Probably needs a barrier...
80 : : */
81 : : static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
82 : : {
83 : 1975956 : m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) |
84 : : (__force unsigned long)mask;
85 : : }
86 : :
87 : : /*
88 : : * The page cache can done in larger chunks than
89 : : * one page, because it allows for more efficient
90 : : * throughput (it can then be mapped into user
91 : : * space in smaller chunks for same flexibility).
92 : : *
93 : : * Or rather, it _will_ be done in larger chunks.
94 : : */
95 : : #define PAGE_CACHE_SHIFT PAGE_SHIFT
96 : : #define PAGE_CACHE_SIZE PAGE_SIZE
97 : : #define PAGE_CACHE_MASK PAGE_MASK
98 : : #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK)
99 : :
100 : : #define page_cache_get(page) get_page(page)
101 : : #define page_cache_release(page) put_page(page)
102 : : void release_pages(struct page **pages, int nr, int cold);
103 : :
104 : : /*
105 : : * speculatively take a reference to a page.
106 : : * If the page is free (_count == 0), then _count is untouched, and 0
107 : : * is returned. Otherwise, _count is incremented by 1 and 1 is returned.
108 : : *
109 : : * This function must be called inside the same rcu_read_lock() section as has
110 : : * been used to lookup the page in the pagecache radix-tree (or page table):
111 : : * this allows allocators to use a synchronize_rcu() to stabilize _count.
112 : : *
113 : : * Unless an RCU grace period has passed, the count of all pages coming out
114 : : * of the allocator must be considered unstable. page_count may return higher
115 : : * than expected, and put_page must be able to do the right thing when the
116 : : * page has been finished with, no matter what it is subsequently allocated
117 : : * for (because put_page is what is used here to drop an invalid speculative
118 : : * reference).
119 : : *
120 : : * This is the interesting part of the lockless pagecache (and lockless
121 : : * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
122 : : * has the following pattern:
123 : : * 1. find page in radix tree
124 : : * 2. conditionally increment refcount
125 : : * 3. check the page is still in pagecache (if no, goto 1)
126 : : *
127 : : * Remove-side that cares about stability of _count (eg. reclaim) has the
128 : : * following (with tree_lock held for write):
129 : : * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
130 : : * B. remove page from pagecache
131 : : * C. free the page
132 : : *
133 : : * There are 2 critical interleavings that matter:
134 : : * - 2 runs before A: in this case, A sees elevated refcount and bails out
135 : : * - A runs before 2: in this case, 2 sees zero refcount and retries;
136 : : * subsequently, B will complete and 1 will find no page, causing the
137 : : * lookup to return NULL.
138 : : *
139 : : * It is possible that between 1 and 2, the page is removed then the exact same
140 : : * page is inserted into the same position in pagecache. That's OK: the
141 : : * old find_get_page using tree_lock could equally have run before or after
142 : : * such a re-insertion, depending on order that locks are granted.
143 : : *
144 : : * Lookups racing against pagecache insertion isn't a big problem: either 1
145 : : * will find the page or it will not. Likewise, the old find_get_page could run
146 : : * either before the insertion or afterwards, depending on timing.
147 : : */
148 : : static inline int page_cache_get_speculative(struct page *page)
149 : : {
150 : : VM_BUG_ON(in_interrupt());
151 : :
152 : : #ifdef CONFIG_TINY_RCU
153 : : # ifdef CONFIG_PREEMPT_COUNT
154 : : VM_BUG_ON(!in_atomic());
155 : : # endif
156 : : /*
157 : : * Preempt must be disabled here - we rely on rcu_read_lock doing
158 : : * this for us.
159 : : *
160 : : * Pagecache won't be truncated from interrupt context, so if we have
161 : : * found a page in the radix tree here, we have pinned its refcount by
162 : : * disabling preempt, and hence no need for the "speculative get" that
163 : : * SMP requires.
164 : : */
165 : : VM_BUG_ON(page_count(page) == 0);
166 : : atomic_inc(&page->_count);
167 : :
168 : : #else
169 [ + - ][ + - ]: 72934400 : if (unlikely(!get_page_unless_zero(page))) {
[ + ][ + ]
170 : : /*
171 : : * Either the page has been freed, or will be freed.
172 : : * In either case, retry here and the caller should
173 : : * do the right thing (see comments above).
174 : : */
175 : : return 0;
176 : : }
177 : : #endif
178 : : VM_BUG_ON(PageTail(page));
179 : :
180 : : return 1;
181 : : }
182 : :
183 : : /*
184 : : * Same as above, but add instead of inc (could just be merged)
185 : : */
186 : : static inline int page_cache_add_speculative(struct page *page, int count)
187 : : {
188 : : VM_BUG_ON(in_interrupt());
189 : :
190 : : #if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
191 : : # ifdef CONFIG_PREEMPT_COUNT
192 : : VM_BUG_ON(!in_atomic());
193 : : # endif
194 : : VM_BUG_ON(page_count(page) == 0);
195 : : atomic_add(count, &page->_count);
196 : :
197 : : #else
198 : : if (unlikely(!atomic_add_unless(&page->_count, count, 0)))
199 : : return 0;
200 : : #endif
201 : : VM_BUG_ON(PageCompound(page) && page != compound_head(page));
202 : :
203 : : return 1;
204 : : }
205 : :
206 : : static inline int page_freeze_refs(struct page *page, int count)
207 : : {
208 : 508627 : return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
209 : : }
210 : :
211 : : static inline void page_unfreeze_refs(struct page *page, int count)
212 : : {
213 : : VM_BUG_ON(page_count(page) != 0);
214 : : VM_BUG_ON(count == 0);
215 : :
216 : 0 : atomic_set(&page->_count, count);
217 : : }
218 : :
219 : : #ifdef CONFIG_NUMA
220 : : extern struct page *__page_cache_alloc(gfp_t gfp);
221 : : #else
222 : : static inline struct page *__page_cache_alloc(gfp_t gfp)
223 : : {
224 : : return alloc_pages(gfp, 0);
225 : : }
226 : : #endif
227 : :
228 : : static inline struct page *page_cache_alloc(struct address_space *x)
229 : : {
230 : : return __page_cache_alloc(mapping_gfp_mask(x));
231 : : }
232 : :
233 : 28255 : static inline struct page *page_cache_alloc_cold(struct address_space *x)
234 : : {
235 : 28255 : return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
236 : : }
237 : :
238 : 407329 : static inline struct page *page_cache_alloc_readahead(struct address_space *x)
239 : : {
240 : 407329 : return __page_cache_alloc(mapping_gfp_mask(x) |
241 : : __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
242 : : }
243 : :
244 : : typedef int filler_t(void *, struct page *);
245 : :
246 : : extern struct page * find_get_page(struct address_space *mapping,
247 : : pgoff_t index);
248 : : extern struct page * find_lock_page(struct address_space *mapping,
249 : : pgoff_t index);
250 : : extern struct page * find_or_create_page(struct address_space *mapping,
251 : : pgoff_t index, gfp_t gfp_mask);
252 : : unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
253 : : unsigned int nr_pages, struct page **pages);
254 : : unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
255 : : unsigned int nr_pages, struct page **pages);
256 : : unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
257 : : int tag, unsigned int nr_pages, struct page **pages);
258 : :
259 : : struct page *grab_cache_page_write_begin(struct address_space *mapping,
260 : : pgoff_t index, unsigned flags);
261 : :
262 : : /*
263 : : * Returns locked page at given index in given cache, creating it if needed.
264 : : */
265 : 0 : static inline struct page *grab_cache_page(struct address_space *mapping,
266 : : pgoff_t index)
267 : : {
268 : 0 : return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
269 : : }
270 : :
271 : : extern struct page * grab_cache_page_nowait(struct address_space *mapping,
272 : : pgoff_t index);
273 : : extern struct page * read_cache_page_async(struct address_space *mapping,
274 : : pgoff_t index, filler_t *filler, void *data);
275 : : extern struct page * read_cache_page(struct address_space *mapping,
276 : : pgoff_t index, filler_t *filler, void *data);
277 : : extern struct page * read_cache_page_gfp(struct address_space *mapping,
278 : : pgoff_t index, gfp_t gfp_mask);
279 : : extern int read_cache_pages(struct address_space *mapping,
280 : : struct list_head *pages, filler_t *filler, void *data);
281 : :
282 : : static inline struct page *read_mapping_page_async(
283 : : struct address_space *mapping,
284 : : pgoff_t index, void *data)
285 : : {
286 : 0 : filler_t *filler = (filler_t *)mapping->a_ops->readpage;
287 : 0 : return read_cache_page_async(mapping, index, filler, data);
288 : : }
289 : :
290 : : static inline struct page *read_mapping_page(struct address_space *mapping,
291 : : pgoff_t index, void *data)
292 : : {
293 : 5464 : filler_t *filler = (filler_t *)mapping->a_ops->readpage;
294 : 5499 : return read_cache_page(mapping, index, filler, data);
295 : : }
296 : :
297 : : /*
298 : : * Return byte-offset into filesystem object for page.
299 : : */
300 : : static inline loff_t page_offset(struct page *page)
301 : : {
302 : 264377 : return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
303 : : }
304 : :
305 : : static inline loff_t page_file_offset(struct page *page)
306 : : {
307 : 0 : return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
308 : : }
309 : :
310 : : extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
311 : : unsigned long address);
312 : :
313 : : static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
314 : : unsigned long address)
315 : : {
316 : : pgoff_t pgoff;
317 : : if (unlikely(is_vm_hugetlb_page(vma)))
318 : : return linear_hugepage_index(vma, address);
319 : 26811862 : pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
320 : 26811862 : pgoff += vma->vm_pgoff;
321 : : return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
322 : : }
323 : :
324 : : extern void __lock_page(struct page *page);
325 : : extern int __lock_page_killable(struct page *page);
326 : : extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
327 : : unsigned int flags);
328 : : extern void unlock_page(struct page *page);
329 : :
330 : : static inline void __set_page_locked(struct page *page)
331 : : {
332 : : __set_bit(PG_locked, &page->flags);
333 : : }
334 : :
335 : : static inline void __clear_page_locked(struct page *page)
336 : : {
337 : : __clear_bit(PG_locked, &page->flags);
338 : : }
339 : :
340 : : static inline int trylock_page(struct page *page)
341 : : {
342 : 74694647 : return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
343 : : }
344 : :
345 : : /*
346 : : * lock_page may only be called if we have the page's inode pinned.
347 : : */
348 : : static inline void lock_page(struct page *page)
349 : : {
350 : : might_sleep();
351 [ - + + + : 14031180 : if (!trylock_page(page))
+ + - + -
+ ]
352 : 276719 : __lock_page(page);
353 : : }
354 : :
355 : : /*
356 : : * lock_page_killable is like lock_page but can be interrupted by fatal
357 : : * signals. It returns 0 if it locked the page and -EINTR if it was
358 : : * killed while waiting.
359 : : */
360 : : static inline int lock_page_killable(struct page *page)
361 : : {
362 : : might_sleep();
363 [ + + # # ]: 29617 : if (!trylock_page(page))
364 : 29247 : return __lock_page_killable(page);
365 : : return 0;
366 : : }
367 : :
368 : : /*
369 : : * lock_page_or_retry - Lock the page, unless this would block and the
370 : : * caller indicated that it can handle a retry.
371 : : */
372 : : static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
373 : : unsigned int flags)
374 : : {
375 : : might_sleep();
376 [ + + ][ + + ]: 40933499 : return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
377 : : }
378 : :
379 : : /*
380 : : * This is exported only for wait_on_page_locked/wait_on_page_writeback.
381 : : * Never use this directly!
382 : : */
383 : : extern void wait_on_page_bit(struct page *page, int bit_nr);
384 : :
385 : : extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
386 : :
387 : : static inline int wait_on_page_locked_killable(struct page *page)
388 : : {
389 [ + + ]: 9721 : if (PageLocked(page))
390 : 7961 : return wait_on_page_bit_killable(page, PG_locked);
391 : : return 0;
392 : : }
393 : :
394 : : /*
395 : : * Wait for a page to be unlocked.
396 : : *
397 : : * This must be called with the caller "holding" the page,
398 : : * ie with increased "page->count" so that the page won't
399 : : * go away during the wait..
400 : : */
401 : : static inline void wait_on_page_locked(struct page *page)
402 : : {
403 [ - + ][ # # ]: 7634 : if (PageLocked(page))
[ - + ][ + + ]
404 : 1416 : wait_on_page_bit(page, PG_locked);
405 : : }
406 : :
407 : : /*
408 : : * Wait for a page to complete writeback
409 : : */
410 : : static inline void wait_on_page_writeback(struct page *page)
411 : : {
412 [ + + ][ + + ]: 1514046 : if (PageWriteback(page))
[ # # ][ # # ]
[ + + ][ + ]
413 : 152485 : wait_on_page_bit(page, PG_writeback);
414 : : }
415 : :
416 : : extern void end_page_writeback(struct page *page);
417 : : void wait_for_stable_page(struct page *page);
418 : :
419 : : /*
420 : : * Add an arbitrary waiter to a page's wait queue
421 : : */
422 : : extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
423 : :
424 : : /*
425 : : * Fault a userspace page into pagetables. Return non-zero on a fault.
426 : : *
427 : : * This assumes that two userspace pages are always sufficient. That's
428 : : * not true if PAGE_CACHE_SIZE > PAGE_SIZE.
429 : : */
430 : : static inline int fault_in_pages_writeable(char __user *uaddr, int size)
431 : : {
432 : : int ret;
433 : :
434 [ + - ]: 20053413 : if (unlikely(size == 0))
435 : : return 0;
436 : :
437 : : /*
438 : : * Writing zeroes into userspace here is OK, because we know that if
439 : : * the zero gets there, we'll be overwriting it.
440 : : */
441 : 20053433 : ret = __put_user(0, uaddr);
442 [ + # ]: 20052368 : if (ret == 0) {
443 : 20053381 : char __user *end = uaddr + size - 1;
444 : :
445 : : /*
446 : : * If the page was already mapped, this will get a cache miss
447 : : * for sure, so try to avoid doing it.
448 : : */
449 [ + + ]: 20053381 : if (((unsigned long)uaddr & PAGE_MASK) !=
450 : 20053381 : ((unsigned long)end & PAGE_MASK))
451 : 3935819 : ret = __put_user(0, end);
452 : : }
453 : : return ret;
454 : : }
455 : :
456 : : static inline int fault_in_pages_readable(const char __user *uaddr, int size)
457 : : {
458 : : volatile char c;
459 : : int ret;
460 : :
461 [ + + ]: 7814305 : if (unlikely(size == 0))
462 : : return 0;
463 : :
464 : 7814288 : ret = __get_user(c, uaddr);
465 [ + + ]: 7814250 : if (ret == 0) {
466 : 7814199 : const char __user *end = uaddr + size - 1;
467 : :
468 [ + + ]: 7814199 : if (((unsigned long)uaddr & PAGE_MASK) !=
469 : 7814199 : ((unsigned long)end & PAGE_MASK)) {
470 : 3143402 : ret = __get_user(c, end);
471 : 3143412 : (void)c;
472 : : }
473 : : }
474 : : return ret;
475 : : }
476 : :
477 : : /*
478 : : * Multipage variants of the above prefault helpers, useful if more than
479 : : * PAGE_SIZE of data needs to be prefaulted. These are separate from the above
480 : : * functions (which only handle up to PAGE_SIZE) to avoid clobbering the
481 : : * filemap.c hotpaths.
482 : : */
483 : : static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
484 : : {
485 : : int ret = 0;
486 : : char __user *end = uaddr + size - 1;
487 : :
488 : : if (unlikely(size == 0))
489 : : return ret;
490 : :
491 : : /*
492 : : * Writing zeroes into userspace here is OK, because we know that if
493 : : * the zero gets there, we'll be overwriting it.
494 : : */
495 : : while (uaddr <= end) {
496 : : ret = __put_user(0, uaddr);
497 : : if (ret != 0)
498 : : return ret;
499 : : uaddr += PAGE_SIZE;
500 : : }
501 : :
502 : : /* Check whether the range spilled into the next page. */
503 : : if (((unsigned long)uaddr & PAGE_MASK) ==
504 : : ((unsigned long)end & PAGE_MASK))
505 : : ret = __put_user(0, end);
506 : :
507 : : return ret;
508 : : }
509 : :
510 : : static inline int fault_in_multipages_readable(const char __user *uaddr,
511 : : int size)
512 : : {
513 : : volatile char c;
514 : : int ret = 0;
515 : : const char __user *end = uaddr + size - 1;
516 : :
517 : : if (unlikely(size == 0))
518 : : return ret;
519 : :
520 : : while (uaddr <= end) {
521 : : ret = __get_user(c, uaddr);
522 : : if (ret != 0)
523 : : return ret;
524 : : uaddr += PAGE_SIZE;
525 : : }
526 : :
527 : : /* Check whether the range spilled into the next page. */
528 : : if (((unsigned long)uaddr & PAGE_MASK) ==
529 : : ((unsigned long)end & PAGE_MASK)) {
530 : : ret = __get_user(c, end);
531 : : (void)c;
532 : : }
533 : :
534 : : return ret;
535 : : }
536 : :
537 : : int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
538 : : pgoff_t index, gfp_t gfp_mask);
539 : : int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
540 : : pgoff_t index, gfp_t gfp_mask);
541 : : extern void delete_from_page_cache(struct page *page);
542 : : extern void __delete_from_page_cache(struct page *page);
543 : : int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
544 : :
545 : : /*
546 : : * Like add_to_page_cache_locked, but used to add newly allocated pages:
547 : : * the page is new, so we can just run __set_page_locked() against it.
548 : : */
549 : : static inline int add_to_page_cache(struct page *page,
550 : : struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
551 : : {
552 : : int error;
553 : :
554 : : __set_page_locked(page);
555 : 2176710 : error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
556 [ + + ]: 2176734 : if (unlikely(error))
557 : : __clear_page_locked(page);
558 : : return error;
559 : : }
560 : :
561 : : #endif /* _LINUX_PAGEMAP_H */
|