Branch data Line data Source code
1 : : /*
2 : : * Fast Userspace Mutexes (which I call "Futexes!").
3 : : * (C) Rusty Russell, IBM 2002
4 : : *
5 : : * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
6 : : * (C) Copyright 2003 Red Hat Inc, All Rights Reserved
7 : : *
8 : : * Removed page pinning, fix privately mapped COW pages and other cleanups
9 : : * (C) Copyright 2003, 2004 Jamie Lokier
10 : : *
11 : : * Robust futex support started by Ingo Molnar
12 : : * (C) Copyright 2006 Red Hat Inc, All Rights Reserved
13 : : * Thanks to Thomas Gleixner for suggestions, analysis and fixes.
14 : : *
15 : : * PI-futex support started by Ingo Molnar and Thomas Gleixner
16 : : * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
17 : : * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
18 : : *
19 : : * PRIVATE futexes by Eric Dumazet
20 : : * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
21 : : *
22 : : * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
23 : : * Copyright (C) IBM Corporation, 2009
24 : : * Thanks to Thomas Gleixner for conceptual design and careful reviews.
25 : : *
26 : : * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
27 : : * enough at me, Linus for the original (flawed) idea, Matthew
28 : : * Kirkwood for proof-of-concept implementation.
29 : : *
30 : : * "The futexes are also cursed."
31 : : * "But they come in a choice of three flavours!"
32 : : *
33 : : * This program is free software; you can redistribute it and/or modify
34 : : * it under the terms of the GNU General Public License as published by
35 : : * the Free Software Foundation; either version 2 of the License, or
36 : : * (at your option) any later version.
37 : : *
38 : : * This program is distributed in the hope that it will be useful,
39 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
40 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41 : : * GNU General Public License for more details.
42 : : *
43 : : * You should have received a copy of the GNU General Public License
44 : : * along with this program; if not, write to the Free Software
45 : : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
46 : : */
47 : : #include <linux/slab.h>
48 : : #include <linux/poll.h>
49 : : #include <linux/fs.h>
50 : : #include <linux/file.h>
51 : : #include <linux/jhash.h>
52 : : #include <linux/init.h>
53 : : #include <linux/futex.h>
54 : : #include <linux/mount.h>
55 : : #include <linux/pagemap.h>
56 : : #include <linux/syscalls.h>
57 : : #include <linux/signal.h>
58 : : #include <linux/export.h>
59 : : #include <linux/magic.h>
60 : : #include <linux/pid.h>
61 : : #include <linux/nsproxy.h>
62 : : #include <linux/ptrace.h>
63 : : #include <linux/sched/rt.h>
64 : : #include <linux/hugetlb.h>
65 : : #include <linux/freezer.h>
66 : : #include <linux/bootmem.h>
67 : :
68 : : #include <asm/futex.h>
69 : :
70 : : #include "locking/rtmutex_common.h"
71 : :
72 : : /*
73 : : * Basic futex operation and ordering guarantees:
74 : : *
75 : : * The waiter reads the futex value in user space and calls
76 : : * futex_wait(). This function computes the hash bucket and acquires
77 : : * the hash bucket lock. After that it reads the futex user space value
78 : : * again and verifies that the data has not changed. If it has not changed
79 : : * it enqueues itself into the hash bucket, releases the hash bucket lock
80 : : * and schedules.
81 : : *
82 : : * The waker side modifies the user space value of the futex and calls
83 : : * futex_wake(). This function computes the hash bucket and acquires the
84 : : * hash bucket lock. Then it looks for waiters on that futex in the hash
85 : : * bucket and wakes them.
86 : : *
87 : : * In futex wake up scenarios where no tasks are blocked on a futex, taking
88 : : * the hb spinlock can be avoided and simply return. In order for this
89 : : * optimization to work, ordering guarantees must exist so that the waiter
90 : : * being added to the list is acknowledged when the list is concurrently being
91 : : * checked by the waker, avoiding scenarios like the following:
92 : : *
93 : : * CPU 0 CPU 1
94 : : * val = *futex;
95 : : * sys_futex(WAIT, futex, val);
96 : : * futex_wait(futex, val);
97 : : * uval = *futex;
98 : : * *futex = newval;
99 : : * sys_futex(WAKE, futex);
100 : : * futex_wake(futex);
101 : : * if (queue_empty())
102 : : * return;
103 : : * if (uval == val)
104 : : * lock(hash_bucket(futex));
105 : : * queue();
106 : : * unlock(hash_bucket(futex));
107 : : * schedule();
108 : : *
109 : : * This would cause the waiter on CPU 0 to wait forever because it
110 : : * missed the transition of the user space value from val to newval
111 : : * and the waker did not find the waiter in the hash bucket queue.
112 : : *
113 : : * The correct serialization ensures that a waiter either observes
114 : : * the changed user space value before blocking or is woken by a
115 : : * concurrent waker:
116 : : *
117 : : * CPU 0 CPU 1
118 : : * val = *futex;
119 : : * sys_futex(WAIT, futex, val);
120 : : * futex_wait(futex, val);
121 : : *
122 : : * waiters++;
123 : : * mb(); (A) <-- paired with -.
124 : : * |
125 : : * lock(hash_bucket(futex)); |
126 : : * |
127 : : * uval = *futex; |
128 : : * | *futex = newval;
129 : : * | sys_futex(WAKE, futex);
130 : : * | futex_wake(futex);
131 : : * |
132 : : * `-------> mb(); (B)
133 : : * if (uval == val)
134 : : * queue();
135 : : * unlock(hash_bucket(futex));
136 : : * schedule(); if (waiters)
137 : : * lock(hash_bucket(futex));
138 : : * wake_waiters(futex);
139 : : * unlock(hash_bucket(futex));
140 : : *
141 : : * Where (A) orders the waiters increment and the futex value read -- this
142 : : * is guaranteed by the head counter in the hb spinlock; and where (B)
143 : : * orders the write to futex and the waiters read -- this is done by the
144 : : * barriers in get_futex_key_refs(), through either ihold or atomic_inc,
145 : : * depending on the futex type.
146 : : *
147 : : * This yields the following case (where X:=waiters, Y:=futex):
148 : : *
149 : : * X = Y = 0
150 : : *
151 : : * w[X]=1 w[Y]=1
152 : : * MB MB
153 : : * r[Y]=y r[X]=x
154 : : *
155 : : * Which guarantees that x==0 && y==0 is impossible; which translates back into
156 : : * the guarantee that we cannot both miss the futex variable change and the
157 : : * enqueue.
158 : : */
159 : :
160 : : int __read_mostly futex_cmpxchg_enabled;
161 : :
162 : : /*
163 : : * Futex flags used to encode options to functions and preserve them across
164 : : * restarts.
165 : : */
166 : : #define FLAGS_SHARED 0x01
167 : : #define FLAGS_CLOCKRT 0x02
168 : : #define FLAGS_HAS_TIMEOUT 0x04
169 : :
170 : : /*
171 : : * Priority Inheritance state:
172 : : */
173 : : struct futex_pi_state {
174 : : /*
175 : : * list of 'owned' pi_state instances - these have to be
176 : : * cleaned up in do_exit() if the task exits prematurely:
177 : : */
178 : : struct list_head list;
179 : :
180 : : /*
181 : : * The PI object:
182 : : */
183 : : struct rt_mutex pi_mutex;
184 : :
185 : : struct task_struct *owner;
186 : : atomic_t refcount;
187 : :
188 : : union futex_key key;
189 : : };
190 : :
191 : : /**
192 : : * struct futex_q - The hashed futex queue entry, one per waiting task
193 : : * @list: priority-sorted list of tasks waiting on this futex
194 : : * @task: the task waiting on the futex
195 : : * @lock_ptr: the hash bucket lock
196 : : * @key: the key the futex is hashed on
197 : : * @pi_state: optional priority inheritance state
198 : : * @rt_waiter: rt_waiter storage for use with requeue_pi
199 : : * @requeue_pi_key: the requeue_pi target futex key
200 : : * @bitset: bitset for the optional bitmasked wakeup
201 : : *
202 : : * We use this hashed waitqueue, instead of a normal wait_queue_t, so
203 : : * we can wake only the relevant ones (hashed queues may be shared).
204 : : *
205 : : * A futex_q has a woken state, just like tasks have TASK_RUNNING.
206 : : * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
207 : : * The order of wakeup is always to make the first condition true, then
208 : : * the second.
209 : : *
210 : : * PI futexes are typically woken before they are removed from the hash list via
211 : : * the rt_mutex code. See unqueue_me_pi().
212 : : */
213 : : struct futex_q {
214 : : struct plist_node list;
215 : :
216 : : struct task_struct *task;
217 : : spinlock_t *lock_ptr;
218 : : union futex_key key;
219 : : struct futex_pi_state *pi_state;
220 : : struct rt_mutex_waiter *rt_waiter;
221 : : union futex_key *requeue_pi_key;
222 : : u32 bitset;
223 : : };
224 : :
225 : : static const struct futex_q futex_q_init = {
226 : : /* list gets initialized in queue_me()*/
227 : : .key = FUTEX_KEY_INIT,
228 : : .bitset = FUTEX_BITSET_MATCH_ANY
229 : : };
230 : :
231 : : /*
232 : : * Hash buckets are shared by all the futex_keys that hash to the same
233 : : * location. Each key may have multiple futex_q structures, one for each task
234 : : * waiting on a futex.
235 : : */
236 : : struct futex_hash_bucket {
237 : : atomic_t waiters;
238 : : spinlock_t lock;
239 : : struct plist_head chain;
240 : : } ____cacheline_aligned_in_smp;
241 : :
242 : : static unsigned long __read_mostly futex_hashsize;
243 : :
244 : : static struct futex_hash_bucket *futex_queues;
245 : :
246 : : static inline void futex_get_mm(union futex_key *key)
247 : : {
248 : 3389 : atomic_inc(&key->private.mm->mm_count);
249 : : /*
250 : : * Ensure futex_get_mm() implies a full barrier such that
251 : : * get_futex_key() implies a full barrier. This is relied upon
252 : : * as full barrier (B), see the ordering comment above.
253 : : */
254 : 3389 : smp_mb__after_atomic_inc();
255 : : }
256 : :
257 : : /*
258 : : * Reflects a new waiter being added to the waitqueue.
259 : : */
260 : : static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
261 : : {
262 : : #ifdef CONFIG_SMP
263 : 3980096 : atomic_inc(&hb->waiters);
264 : : /*
265 : : * Full barrier (A), see the ordering comment above.
266 : : */
267 : 3980022 : smp_mb__after_atomic_inc();
268 : : #endif
269 : : }
270 : :
271 : : /*
272 : : * Reflects a waiter being removed from the waitqueue by wakeup
273 : : * paths.
274 : : */
275 : : static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
276 : : {
277 : : #ifdef CONFIG_SMP
278 : 3980228 : atomic_dec(&hb->waiters);
279 : : #endif
280 : : }
281 : :
282 : : static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
283 : : {
284 : : #ifdef CONFIG_SMP
285 : 7726236 : return atomic_read(&hb->waiters);
286 : : #else
287 : : return 1;
288 : : #endif
289 : : }
290 : :
291 : : /*
292 : : * We hash on the keys returned from get_futex_key (see below).
293 : : */
294 : 0 : static struct futex_hash_bucket *hash_futex(union futex_key *key)
295 : : {
296 : 25749482 : u32 hash = jhash2((u32*)&key->both.word,
297 : : (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
298 : 12874741 : key->both.offset);
299 : 12874741 : return &futex_queues[hash & (futex_hashsize - 1)];
300 : : }
301 : :
302 : : /*
303 : : * Return 1 if two futex_keys are equal, 0 otherwise.
304 : : */
305 : : static inline int match_futex(union futex_key *key1, union futex_key *key2)
306 : : {
307 : 0 : return (key1 && key2
308 [ # # ][ # # ]: 1103468 : && key1->both.word == key2->both.word
[ + + ][ # # ]
[ # # ][ + - ]
[ # # ][ + + ]
[ # # ][ # # ]
309 [ # # ][ # # ]: 1103180 : && key1->both.ptr == key2->both.ptr
[ + - ][ # # ]
[ # # ][ + - ]
[ # # ][ + ]
[ # # ][ # # ]
310 [ # # ][ # # ]: 2206946 : && key1->both.offset == key2->both.offset);
[ # # ][ + - ]
[ - + ][ # # ]
[ # # ][ # # ]
[ # # ][ + - ]
[ - + ][ # # ]
[ # # ][ + + ]
[ + ][ # # ]
[ # # ][ # # ]
[ # # ]
311 : : }
312 : :
313 : : /*
314 : : * Take a reference to the resource addressed by a key.
315 : : * Can be called while holding spinlocks.
316 : : *
317 : : */
318 : 12856454 : static void get_futex_key_refs(union futex_key *key)
319 : : {
320 [ + + ]: 12856454 : if (!key->both.ptr)
321 : 0 : return;
322 : :
323 [ - + + ]: 12853617 : switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
324 : : case FUT_OFF_INODE:
325 : 0 : ihold(key->shared.inode); /* implies MB (B) */
326 : : break;
327 : : case FUT_OFF_MMSHARED:
328 : : futex_get_mm(key); /* implies MB (B) */
329 : : break;
330 : : }
331 : : }
332 : :
333 : : /*
334 : : * Drop a reference to the resource addressed by a key.
335 : : * The hash bucket spinlock must not be held.
336 : : */
337 : 0 : static void drop_futex_key_refs(union futex_key *key)
338 : : {
339 [ - + ]: 12866553 : if (!key->both.ptr) {
340 : : /* If we're here then we tried to put a key we failed to get */
341 [ # # ][ # # ]: 0 : WARN_ON_ONCE(1);
342 : 12866552 : return;
343 : : }
344 : :
345 [ - + + ]: 12866553 : switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
346 : : case FUT_OFF_INODE:
347 : 0 : iput(key->shared.inode);
348 : : break;
349 : : case FUT_OFF_MMSHARED:
350 : : mmdrop(key->private.mm);
351 : : break;
352 : : }
353 : : }
354 : :
355 : : /**
356 : : * get_futex_key() - Get parameters which are the keys for a futex
357 : : * @uaddr: virtual address of the futex
358 : : * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
359 : : * @key: address where result is stored.
360 : : * @rw: mapping needs to be read/write (values: VERIFY_READ,
361 : : * VERIFY_WRITE)
362 : : *
363 : : * Return: a negative error code or 0
364 : : *
365 : : * The key words are stored in *key on success.
366 : : *
367 : : * For shared mappings, it's (page->index, file_inode(vma->vm_file),
368 : : * offset_within_page). For private mappings, it's (uaddr, current->mm).
369 : : * We can usually work out the index without swapping in the page.
370 : : *
371 : : * lock_page() might sleep, the caller should not hold a spinlock.
372 : : */
373 : : static int
374 : 0 : get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
375 : : {
376 : 12812692 : unsigned long address = (unsigned long)uaddr;
377 : 12812692 : struct mm_struct *mm = current->mm;
378 : : struct page *page, *page_head;
379 : : int err, ro = 0;
380 : :
381 : : /*
382 : : * The futex address must be "naturally" aligned.
383 : : */
384 : 12812692 : key->both.offset = address % PAGE_SIZE;
385 [ + ]: 12812692 : if (unlikely((address % sizeof(u32)) != 0))
386 : : return -EINVAL;
387 : 12818437 : address -= key->both.offset;
388 : :
389 [ + ]: 12818437 : if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
390 : : return -EFAULT;
391 : :
392 : : /*
393 : : * PROCESS_PRIVATE futexes are fast.
394 : : * As the mm cannot disappear under us and the 'key' only needs
395 : : * virtual address, we dont even have to find the underlying vma.
396 : : * Note : We do have to check 'uaddr' is a valid user address,
397 : : * but access_ok() should be faster than find_vma()
398 : : */
399 [ + + ]: 12821771 : if (!fshared) {
400 : 12818383 : key->private.mm = mm;
401 : 12818383 : key->private.address = address;
402 : 12818383 : get_futex_key_refs(key); /* implies MB (B) */
403 : 12852872 : return 0;
404 : : }
405 : :
406 : : again:
407 : 3388 : err = get_user_pages_fast(address, 1, 1, &page);
408 : : /*
409 : : * If write access is not required (eg. FUTEX_WAIT), try
410 : : * and get read-only access.
411 : : */
412 [ - + ]: 3389 : if (err == -EFAULT && rw == VERIFY_READ) {
413 : 0 : err = get_user_pages_fast(address, 1, 0, &page);
414 : : ro = 1;
415 : : }
416 [ + - ]: 3388 : if (err < 0)
417 : : return err;
418 : : else
419 : : err = 0;
420 : :
421 : : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
422 : : page_head = page;
423 : : if (unlikely(PageTail(page))) {
424 : : put_page(page);
425 : : /* serialize against __split_huge_page_splitting() */
426 : : local_irq_disable();
427 : : if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
428 : : page_head = compound_head(page);
429 : : /*
430 : : * page_head is valid pointer but we must pin
431 : : * it before taking the PG_lock and/or
432 : : * PG_compound_lock. The moment we re-enable
433 : : * irqs __split_huge_page_splitting() can
434 : : * return and the head page can be freed from
435 : : * under us. We can't take the PG_lock and/or
436 : : * PG_compound_lock on a page that could be
437 : : * freed from under us.
438 : : */
439 : : if (page != page_head) {
440 : : get_page(page_head);
441 : : put_page(page);
442 : : }
443 : : local_irq_enable();
444 : : } else {
445 : : local_irq_enable();
446 : : goto again;
447 : : }
448 : : }
449 : : #else
450 : 3388 : page_head = compound_head(page);
451 [ - + ]: 3388 : if (page != page_head) {
452 : : get_page(page_head);
453 : 0 : put_page(page);
454 : : }
455 : : #endif
456 : :
457 : : lock_page(page_head);
458 : :
459 : : /*
460 : : * If page_head->mapping is NULL, then it cannot be a PageAnon
461 : : * page; but it might be the ZERO_PAGE or in the gate area or
462 : : * in a special mapping (all cases which we are happy to fail);
463 : : * or it may have been a good file page when get_user_pages_fast
464 : : * found it, but truncated or holepunched or subjected to
465 : : * invalidate_complete_page2 before we got the page lock (also
466 : : * cases which we are happy to fail). And we hold a reference,
467 : : * so refcount care in invalidate_complete_page's remove_mapping
468 : : * prevents drop_caches from setting mapping to NULL beneath us.
469 : : *
470 : : * The case we do have to guard against is when memory pressure made
471 : : * shmem_writepage move it from filecache to swapcache beneath us:
472 : : * an unlikely race, but we do need to retry for page_head->mapping.
473 : : */
474 [ - + ]: 12816081 : if (!page_head->mapping) {
475 : : int shmem_swizzled = PageSwapCache(page_head);
476 : 0 : unlock_page(page_head);
477 : 0 : put_page(page_head);
478 [ # # ]: 0 : if (shmem_swizzled)
479 : : goto again;
480 : : return -EFAULT;
481 : : }
482 : :
483 : : /*
484 : : * Private mappings are handled in a simple way.
485 : : *
486 : : * NOTE: When userspace waits on a MAP_SHARED mapping, even if
487 : : * it's a read-only handle, it's expected that futexes attach to
488 : : * the object not the particular process.
489 : : */
490 [ + - ]: 3389 : if (PageAnon(page_head)) {
491 : : /*
492 : : * A RO anonymous page will never change and thus doesn't make
493 : : * sense for futex operations.
494 : : */
495 [ + - ]: 3389 : if (ro) {
496 : : err = -EFAULT;
497 : : goto out;
498 : : }
499 : :
500 : 3389 : key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
501 : 3389 : key->private.mm = mm;
502 : 3389 : key->private.address = address;
503 : : } else {
504 : 0 : key->both.offset |= FUT_OFF_INODE; /* inode-based key */
505 : 0 : key->shared.inode = page_head->mapping->host;
506 : 0 : key->shared.pgoff = basepage_index(page);
507 : : }
508 : :
509 : 3389 : get_futex_key_refs(key); /* implies MB (B) */
510 : :
511 : : out:
512 : 3389 : unlock_page(page_head);
513 : 3388 : put_page(page_head);
514 : 3389 : return err;
515 : : }
516 : :
517 : : static inline void put_futex_key(union futex_key *key)
518 : : {
519 : 11098378 : drop_futex_key_refs(key);
520 : : }
521 : :
522 : : /**
523 : : * fault_in_user_writeable() - Fault in user address and verify RW access
524 : : * @uaddr: pointer to faulting user space address
525 : : *
526 : : * Slow path to fixup the fault we just took in the atomic write
527 : : * access to @uaddr.
528 : : *
529 : : * We have no generic implementation of a non-destructive write to the
530 : : * user address. We know that we faulted in the atomic pagefault
531 : : * disabled section so we can as well avoid the #PF overhead by
532 : : * calling get_user_pages() right away.
533 : : */
534 : 0 : static int fault_in_user_writeable(u32 __user *uaddr)
535 : : {
536 : 0 : struct mm_struct *mm = current->mm;
537 : : int ret;
538 : :
539 : 0 : down_read(&mm->mmap_sem);
540 : 0 : ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
541 : : FAULT_FLAG_WRITE);
542 : 0 : up_read(&mm->mmap_sem);
543 : :
544 : 0 : return ret < 0 ? ret : 0;
545 : : }
546 : :
547 : : /**
548 : : * futex_top_waiter() - Return the highest priority waiter on a futex
549 : : * @hb: the hash bucket the futex_q's reside in
550 : : * @key: the futex key (to distinguish it from other futex futex_q's)
551 : : *
552 : : * Must be called with the hb lock held.
553 : : */
554 : 0 : static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
555 : : union futex_key *key)
556 : : {
557 : : struct futex_q *this;
558 : :
559 [ # # ]: 0 : plist_for_each_entry(this, &hb->chain, list) {
560 [ # # ]: 0 : if (match_futex(&this->key, key))
561 : : return this;
562 : : }
563 : : return NULL;
564 : : }
565 : :
566 : 0 : static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
567 : : u32 uval, u32 newval)
568 : : {
569 : : int ret;
570 : :
571 : : pagefault_disable();
572 : : ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
573 : : pagefault_enable();
574 : :
575 : 0 : return ret;
576 : : }
577 : :
578 : 0 : static int get_futex_value_locked(u32 *dest, u32 __user *from)
579 : : {
580 : : int ret;
581 : :
582 : : pagefault_disable();
583 : 3980118 : ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
584 : : pagefault_enable();
585 : :
586 [ + ]: 3980149 : return ret ? -EFAULT : 0;
587 : : }
588 : :
589 : :
590 : : /*
591 : : * PI code:
592 : : */
593 : 0 : static int refill_pi_state_cache(void)
594 : : {
595 : : struct futex_pi_state *pi_state;
596 : :
597 [ # # ]: 0 : if (likely(current->pi_state_cache))
598 : : return 0;
599 : :
600 : : pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
601 : :
602 [ # # ]: 0 : if (!pi_state)
603 : : return -ENOMEM;
604 : :
605 : 0 : INIT_LIST_HEAD(&pi_state->list);
606 : : /* pi_mutex gets initialized later */
607 : 0 : pi_state->owner = NULL;
608 : 0 : atomic_set(&pi_state->refcount, 1);
609 : 0 : pi_state->key = FUTEX_KEY_INIT;
610 : :
611 : 0 : current->pi_state_cache = pi_state;
612 : :
613 : 0 : return 0;
614 : : }
615 : :
616 : 0 : static struct futex_pi_state * alloc_pi_state(void)
617 : : {
618 : 0 : struct futex_pi_state *pi_state = current->pi_state_cache;
619 : :
620 [ # # ]: 0 : WARN_ON(!pi_state);
621 : 0 : current->pi_state_cache = NULL;
622 : :
623 : 0 : return pi_state;
624 : : }
625 : :
626 : 0 : static void free_pi_state(struct futex_pi_state *pi_state)
627 : : {
628 [ # # ]: 0 : if (!atomic_dec_and_test(&pi_state->refcount))
629 : 0 : return;
630 : :
631 : : /*
632 : : * If pi_state->owner is NULL, the owner is most probably dying
633 : : * and has cleaned up the pi_state already
634 : : */
635 [ # # ]: 0 : if (pi_state->owner) {
636 : 0 : raw_spin_lock_irq(&pi_state->owner->pi_lock);
637 : 0 : list_del_init(&pi_state->list);
638 : 0 : raw_spin_unlock_irq(&pi_state->owner->pi_lock);
639 : :
640 : 0 : rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
641 : : }
642 : :
643 [ # # ]: 0 : if (current->pi_state_cache)
644 : 0 : kfree(pi_state);
645 : : else {
646 : : /*
647 : : * pi_state->list is already empty.
648 : : * clear pi_state->owner.
649 : : * refcount is at 0 - put it back to 1.
650 : : */
651 : 0 : pi_state->owner = NULL;
652 : 0 : atomic_set(&pi_state->refcount, 1);
653 : 0 : current->pi_state_cache = pi_state;
654 : : }
655 : : }
656 : :
657 : : /*
658 : : * Look up the task based on what TID userspace gave us.
659 : : * We dont trust it.
660 : : */
661 : 0 : static struct task_struct * futex_find_get_task(pid_t pid)
662 : : {
663 : : struct task_struct *p;
664 : :
665 : : rcu_read_lock();
666 : 0 : p = find_task_by_vpid(pid);
667 [ # # ]: 0 : if (p)
668 : 0 : get_task_struct(p);
669 : :
670 : : rcu_read_unlock();
671 : :
672 : 0 : return p;
673 : : }
674 : :
675 : : /*
676 : : * This task is holding PI mutexes at exit time => bad.
677 : : * Kernel cleans up PI-state, but userspace is likely hosed.
678 : : * (Robust-futex cleanup is separate and might save the day for userspace.)
679 : : */
680 : 0 : void exit_pi_state_list(struct task_struct *curr)
681 : : {
682 : 0 : struct list_head *next, *head = &curr->pi_state_list;
683 : : struct futex_pi_state *pi_state;
684 : : struct futex_hash_bucket *hb;
685 : 0 : union futex_key key = FUTEX_KEY_INIT;
686 : :
687 [ # # ]: 0 : if (!futex_cmpxchg_enabled)
688 : 0 : return;
689 : : /*
690 : : * We are a ZOMBIE and nobody can enqueue itself on
691 : : * pi_state_list anymore, but we have to be careful
692 : : * versus waiters unqueueing themselves:
693 : : */
694 : 0 : raw_spin_lock_irq(&curr->pi_lock);
695 [ # # ]: 0 : while (!list_empty(head)) {
696 : :
697 : : next = head->next;
698 : : pi_state = list_entry(next, struct futex_pi_state, list);
699 : 0 : key = pi_state->key;
700 : 0 : hb = hash_futex(&key);
701 : : raw_spin_unlock_irq(&curr->pi_lock);
702 : :
703 : : spin_lock(&hb->lock);
704 : :
705 : 0 : raw_spin_lock_irq(&curr->pi_lock);
706 : : /*
707 : : * We dropped the pi-lock, so re-check whether this
708 : : * task still owns the PI-state:
709 : : */
710 [ # # ]: 0 : if (head->next != next) {
711 : : spin_unlock(&hb->lock);
712 : 0 : continue;
713 : : }
714 : :
715 [ # # ]: 0 : WARN_ON(pi_state->owner != curr);
716 [ # # ]: 0 : WARN_ON(list_empty(&pi_state->list));
717 : : list_del_init(&pi_state->list);
718 : 0 : pi_state->owner = NULL;
719 : : raw_spin_unlock_irq(&curr->pi_lock);
720 : :
721 : 0 : rt_mutex_unlock(&pi_state->pi_mutex);
722 : :
723 : : spin_unlock(&hb->lock);
724 : :
725 : 0 : raw_spin_lock_irq(&curr->pi_lock);
726 : : }
727 : : raw_spin_unlock_irq(&curr->pi_lock);
728 : : }
729 : :
730 : : static int
731 : 0 : lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
732 : : union futex_key *key, struct futex_pi_state **ps)
733 : : {
734 : : struct futex_pi_state *pi_state = NULL;
735 : : struct futex_q *this, *next;
736 : : struct task_struct *p;
737 : 0 : pid_t pid = uval & FUTEX_TID_MASK;
738 : :
739 [ # # ]: 0 : plist_for_each_entry_safe(this, next, &hb->chain, list) {
740 [ # # ]: 0 : if (match_futex(&this->key, key)) {
741 : : /*
742 : : * Another waiter already exists - bump up
743 : : * the refcount and return its pi_state:
744 : : */
745 : 0 : pi_state = this->pi_state;
746 : : /*
747 : : * Userspace might have messed up non-PI and PI futexes
748 : : */
749 [ # # ]: 0 : if (unlikely(!pi_state))
750 : : return -EINVAL;
751 : :
752 [ # # ]: 0 : WARN_ON(!atomic_read(&pi_state->refcount));
753 : :
754 : : /*
755 : : * When pi_state->owner is NULL then the owner died
756 : : * and another waiter is on the fly. pi_state->owner
757 : : * is fixed up by the task which acquires
758 : : * pi_state->rt_mutex.
759 : : *
760 : : * We do not check for pid == 0 which can happen when
761 : : * the owner died and robust_list_exit() cleared the
762 : : * TID.
763 : : */
764 [ # # ][ # # ]: 0 : if (pid && pi_state->owner) {
765 : : /*
766 : : * Bail out if user space manipulated the
767 : : * futex value.
768 : : */
769 [ # # ]: 0 : if (pid != task_pid_vnr(pi_state->owner))
770 : : return -EINVAL;
771 : : }
772 : :
773 : 0 : atomic_inc(&pi_state->refcount);
774 : 0 : *ps = pi_state;
775 : :
776 : 0 : return 0;
777 : : }
778 : : }
779 : :
780 : : /*
781 : : * We are the first waiter - try to look up the real owner and attach
782 : : * the new pi_state to it, but bail out when TID = 0
783 : : */
784 [ # # ]: 0 : if (!pid)
785 : : return -ESRCH;
786 : 0 : p = futex_find_get_task(pid);
787 [ # # ]: 0 : if (!p)
788 : : return -ESRCH;
789 : :
790 : : /*
791 : : * We need to look at the task state flags to figure out,
792 : : * whether the task is exiting. To protect against the do_exit
793 : : * change of the task flags, we do this protected by
794 : : * p->pi_lock:
795 : : */
796 : 0 : raw_spin_lock_irq(&p->pi_lock);
797 [ # # ]: 0 : if (unlikely(p->flags & PF_EXITING)) {
798 : : /*
799 : : * The task is on the way out. When PF_EXITPIDONE is
800 : : * set, we know that the task has finished the
801 : : * cleanup:
802 : : */
803 [ # # ]: 0 : int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
804 : :
805 : : raw_spin_unlock_irq(&p->pi_lock);
806 : : put_task_struct(p);
807 : 0 : return ret;
808 : : }
809 : :
810 : 0 : pi_state = alloc_pi_state();
811 : :
812 : : /*
813 : : * Initialize the pi_mutex in locked state and make 'p'
814 : : * the owner of it:
815 : : */
816 : 0 : rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
817 : :
818 : : /* Store the key for possible exit cleanups: */
819 : 0 : pi_state->key = *key;
820 : :
821 [ # # ]: 0 : WARN_ON(!list_empty(&pi_state->list));
822 : 0 : list_add(&pi_state->list, &p->pi_state_list);
823 : 0 : pi_state->owner = p;
824 : : raw_spin_unlock_irq(&p->pi_lock);
825 : :
826 : : put_task_struct(p);
827 : :
828 : 0 : *ps = pi_state;
829 : :
830 : 0 : return 0;
831 : : }
832 : :
833 : : /**
834 : : * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
835 : : * @uaddr: the pi futex user address
836 : : * @hb: the pi futex hash bucket
837 : : * @key: the futex key associated with uaddr and hb
838 : : * @ps: the pi_state pointer where we store the result of the
839 : : * lookup
840 : : * @task: the task to perform the atomic lock work for. This will
841 : : * be "current" except in the case of requeue pi.
842 : : * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
843 : : *
844 : : * Return:
845 : : * 0 - ready to wait;
846 : : * 1 - acquired the lock;
847 : : * <0 - error
848 : : *
849 : : * The hb->lock and futex_key refs shall be held by the caller.
850 : : */
851 : 0 : static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
852 : : union futex_key *key,
853 : : struct futex_pi_state **ps,
854 : : struct task_struct *task, int set_waiters)
855 : : {
856 : : int lock_taken, ret, force_take = 0;
857 : 0 : u32 uval, newval, curval, vpid = task_pid_vnr(task);
858 : :
859 : : retry:
860 : : ret = lock_taken = 0;
861 : :
862 : : /*
863 : : * To avoid races, we attempt to take the lock here again
864 : : * (by doing a 0 -> TID atomic cmpxchg), while holding all
865 : : * the locks. It will most likely not succeed.
866 : : */
867 : : newval = vpid;
868 [ # # ]: 0 : if (set_waiters)
869 : 0 : newval |= FUTEX_WAITERS;
870 : :
871 [ # # ]: 0 : if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
872 : : return -EFAULT;
873 : :
874 : : /*
875 : : * Detect deadlocks.
876 : : */
877 [ # # ]: 0 : if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
878 : : return -EDEADLK;
879 : :
880 : : /*
881 : : * Surprise - we got the lock. Just return to userspace:
882 : : */
883 [ # # ]: 0 : if (unlikely(!curval))
884 : : return 1;
885 : :
886 : : uval = curval;
887 : :
888 : : /*
889 : : * Set the FUTEX_WAITERS flag, so the owner will know it has someone
890 : : * to wake at the next unlock.
891 : : */
892 : 0 : newval = curval | FUTEX_WAITERS;
893 : :
894 : : /*
895 : : * Should we force take the futex? See below.
896 : : */
897 [ # # ]: 0 : if (unlikely(force_take)) {
898 : : /*
899 : : * Keep the OWNER_DIED and the WAITERS bit and set the
900 : : * new TID value.
901 : : */
902 : 0 : newval = (curval & ~FUTEX_TID_MASK) | vpid;
903 : : force_take = 0;
904 : : lock_taken = 1;
905 : : }
906 : :
907 [ # # ]: 0 : if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
908 : : return -EFAULT;
909 [ # # ]: 0 : if (unlikely(curval != uval))
910 : : goto retry;
911 : :
912 : : /*
913 : : * We took the lock due to forced take over.
914 : : */
915 [ # # ]: 0 : if (unlikely(lock_taken))
916 : : return 1;
917 : :
918 : : /*
919 : : * We dont have the lock. Look up the PI state (or create it if
920 : : * we are the first waiter):
921 : : */
922 : 0 : ret = lookup_pi_state(uval, hb, key, ps);
923 : :
924 [ # # ]: 0 : if (unlikely(ret)) {
925 [ # # ]: 0 : switch (ret) {
926 : : case -ESRCH:
927 : : /*
928 : : * We failed to find an owner for this
929 : : * futex. So we have no pi_state to block
930 : : * on. This can happen in two cases:
931 : : *
932 : : * 1) The owner died
933 : : * 2) A stale FUTEX_WAITERS bit
934 : : *
935 : : * Re-read the futex value.
936 : : */
937 [ # # ]: 0 : if (get_futex_value_locked(&curval, uaddr))
938 : : return -EFAULT;
939 : :
940 : : /*
941 : : * If the owner died or we have a stale
942 : : * WAITERS bit the owner TID in the user space
943 : : * futex is 0.
944 : : */
945 [ # # ]: 0 : if (!(curval & FUTEX_TID_MASK)) {
946 : : force_take = 1;
947 : : goto retry;
948 : : }
949 : : default:
950 : : break;
951 : : }
952 : : }
953 : :
954 : 0 : return ret;
955 : : }
956 : :
957 : : /**
958 : : * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
959 : : * @q: The futex_q to unqueue
960 : : *
961 : : * The q->lock_ptr must not be NULL and must be held by the caller.
962 : : */
963 : 0 : static void __unqueue_futex(struct futex_q *q)
964 : : {
965 : : struct futex_hash_bucket *hb;
966 : :
967 [ + ][ - + ]: 1203086 : if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
[ # # ][ + - ]
968 [ - + ][ + - ]: 1203132 : || WARN_ON(plist_node_empty(&q->list)))
969 : 1203131 : return;
970 : :
971 : 1203084 : hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
972 : 1203084 : plist_del(&q->list, &hb->chain);
973 : : hb_waiters_dec(hb);
974 : : }
975 : :
976 : : /*
977 : : * The hash bucket lock must be held when this is called.
978 : : * Afterwards, the futex_q must not be accessed.
979 : : */
980 : 0 : static void wake_futex(struct futex_q *q)
981 : : {
982 : 1103064 : struct task_struct *p = q->task;
983 : :
984 [ + ][ + + ]: 1103064 : if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
[ - + ][ + - ]
985 : 1103094 : return;
986 : :
987 : : /*
988 : : * We set q->lock_ptr = NULL _before_ we wake up the task. If
989 : : * a non-futex wake up happens on another CPU then the task
990 : : * might exit and p would dereference a non-existing task
991 : : * struct. Prevent this by holding a reference on p across the
992 : : * wake up.
993 : : */
994 : 1103085 : get_task_struct(p);
995 : :
996 : 1103093 : __unqueue_futex(q);
997 : : /*
998 : : * The waiting task can free the futex_q as soon as
999 : : * q->lock_ptr = NULL is written, without taking any locks. A
1000 : : * memory barrier is required here to prevent the following
1001 : : * store to lock_ptr from getting ahead of the plist_del.
1002 : : */
1003 : 1103110 : smp_wmb();
1004 : 1102949 : q->lock_ptr = NULL;
1005 : :
1006 : 1102949 : wake_up_state(p, TASK_NORMAL);
1007 : : put_task_struct(p);
1008 : : }
1009 : :
1010 : 0 : static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
1011 : : {
1012 : : struct task_struct *new_owner;
1013 : 0 : struct futex_pi_state *pi_state = this->pi_state;
1014 : : u32 uninitialized_var(curval), newval;
1015 : :
1016 [ # # ]: 0 : if (!pi_state)
1017 : : return -EINVAL;
1018 : :
1019 : : /*
1020 : : * If current does not own the pi_state then the futex is
1021 : : * inconsistent and user space fiddled with the futex value.
1022 : : */
1023 [ # # ]: 0 : if (pi_state->owner != current)
1024 : : return -EINVAL;
1025 : :
1026 : 0 : raw_spin_lock(&pi_state->pi_mutex.wait_lock);
1027 : 0 : new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
1028 : :
1029 : : /*
1030 : : * It is possible that the next waiter (the one that brought
1031 : : * this owner to the kernel) timed out and is no longer
1032 : : * waiting on the lock.
1033 : : */
1034 [ # # ]: 0 : if (!new_owner)
1035 : 0 : new_owner = this->task;
1036 : :
1037 : : /*
1038 : : * We pass it to the next owner. (The WAITERS bit is always
1039 : : * kept enabled while there is PI state around. We must also
1040 : : * preserve the owner died bit.)
1041 : : */
1042 [ # # ]: 0 : if (!(uval & FUTEX_OWNER_DIED)) {
1043 : : int ret = 0;
1044 : :
1045 : 0 : newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
1046 : :
1047 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1048 : : ret = -EFAULT;
1049 [ # # ]: 0 : else if (curval != uval)
1050 : : ret = -EINVAL;
1051 [ # # ]: 0 : if (ret) {
1052 : : raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1053 : : return ret;
1054 : : }
1055 : : }
1056 : :
1057 : 0 : raw_spin_lock_irq(&pi_state->owner->pi_lock);
1058 [ # # ]: 0 : WARN_ON(list_empty(&pi_state->list));
1059 : : list_del_init(&pi_state->list);
1060 : 0 : raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1061 : :
1062 : 0 : raw_spin_lock_irq(&new_owner->pi_lock);
1063 [ # # ]: 0 : WARN_ON(!list_empty(&pi_state->list));
1064 : 0 : list_add(&pi_state->list, &new_owner->pi_state_list);
1065 : 0 : pi_state->owner = new_owner;
1066 : : raw_spin_unlock_irq(&new_owner->pi_lock);
1067 : :
1068 : : raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
1069 : 0 : rt_mutex_unlock(&pi_state->pi_mutex);
1070 : :
1071 : : return 0;
1072 : : }
1073 : :
1074 : 0 : static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
1075 : : {
1076 : : u32 uninitialized_var(oldval);
1077 : :
1078 : : /*
1079 : : * There is no waiter, so we unlock the futex. The owner died
1080 : : * bit has not to be preserved here. We are the owner:
1081 : : */
1082 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
1083 : : return -EFAULT;
1084 [ # # ]: 0 : if (oldval != uval)
1085 : : return -EAGAIN;
1086 : :
1087 : 0 : return 0;
1088 : : }
1089 : :
1090 : : /*
1091 : : * Express the locking dependencies for lockdep:
1092 : : */
1093 : : static inline void
1094 : : double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1095 : : {
1096 [ + + ][ + + ]: 595147 : if (hb1 <= hb2) {
1097 : : spin_lock(&hb1->lock);
1098 [ + - + - ]: 512326 : if (hb1 < hb2)
1099 : 512326 : spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
1100 : : } else { /* hb1 > hb2 */
1101 : : spin_lock(&hb2->lock);
1102 : 82821 : spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
1103 : : }
1104 : : }
1105 : :
1106 : : static inline void
1107 : : double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
1108 : : {
1109 : : spin_unlock(&hb1->lock);
1110 [ # # # # : 595147 : if (hb1 != hb2)
# # + - #
# + - ]
1111 : : spin_unlock(&hb2->lock);
1112 : : }
1113 : :
1114 : : /*
1115 : : * Wake up waiters matching bitset queued on this futex (uaddr).
1116 : : */
1117 : : static int
1118 : 0 : futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
1119 : : {
1120 : : struct futex_hash_bucket *hb;
1121 : : struct futex_q *this, *next;
1122 : 7732769 : union futex_key key = FUTEX_KEY_INIT;
1123 : : int ret;
1124 : :
1125 [ + ]: 7732769 : if (!bitset)
1126 : : return -EINVAL;
1127 : :
1128 : 7732834 : ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
1129 [ + ]: 7731935 : if (unlikely(ret != 0))
1130 : : goto out;
1131 : :
1132 : 7731998 : hb = hash_futex(&key);
1133 : :
1134 : : /* Make sure we really have tasks to wakeup */
1135 [ + + ]: 7726236 : if (!hb_waiters_pending(hb))
1136 : : goto out_put_key;
1137 : :
1138 : : spin_lock(&hb->lock);
1139 : :
1140 [ + + ]: 1388661 : plist_for_each_entry_safe(this, next, &hb->chain, list) {
1141 [ + + ]: 508231 : if (match_futex (&this->key, &key)) {
1142 [ + ][ + ]: 507952 : if (this->pi_state || this->rt_waiter) {
1143 : : ret = -EINVAL;
1144 : : break;
1145 : : }
1146 : :
1147 : : /* Check if one of the bits is set in both bitsets */
1148 [ - + ]: 507974 : if (!(this->bitset & bitset))
1149 : 0 : continue;
1150 : :
1151 : 507974 : wake_futex(this);
1152 [ + + ]: 508038 : if (++ret >= nr_wake)
1153 : : break;
1154 : : }
1155 : : }
1156 : :
1157 : : spin_unlock(&hb->lock);
1158 : : out_put_key:
1159 : 7726273 : put_futex_key(&key);
1160 : : out:
1161 : 7680868 : return ret;
1162 : : }
1163 : :
1164 : : /*
1165 : : * Wake up all waiters hashed on the physical page that is mapped
1166 : : * to this virtual address:
1167 : : */
1168 : : static int
1169 : 0 : futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1170 : : int nr_wake, int nr_wake2, int op)
1171 : : {
1172 : 594978 : union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1173 : : struct futex_hash_bucket *hb1, *hb2;
1174 : : struct futex_q *this, *next;
1175 : : int ret, op_ret;
1176 : :
1177 : : retry:
1178 : 594978 : ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1179 [ + - ]: 594978 : if (unlikely(ret != 0))
1180 : : goto out;
1181 : 594978 : ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1182 [ + - ]: 594978 : if (unlikely(ret != 0))
1183 : : goto out_put_key1;
1184 : :
1185 : 594978 : hb1 = hash_futex(&key1);
1186 : 594978 : hb2 = hash_futex(&key2);
1187 : :
1188 : : retry_private:
1189 : : double_lock_hb(hb1, hb2);
1190 : : op_ret = futex_atomic_op_inuser(op, uaddr2);
1191 [ - + ]: 594978 : if (unlikely(op_ret < 0)) {
1192 : :
1193 : : double_unlock_hb(hb1, hb2);
1194 : :
1195 : : #ifndef CONFIG_MMU
1196 : : /*
1197 : : * we don't get EFAULT from MMU faults if we don't have an MMU,
1198 : : * but we might get them from range checking
1199 : : */
1200 : : ret = op_ret;
1201 : : goto out_put_keys;
1202 : : #endif
1203 : :
1204 [ # # ]: 0 : if (unlikely(op_ret != -EFAULT)) {
1205 : : ret = op_ret;
1206 : : goto out_put_keys;
1207 : : }
1208 : :
1209 : 0 : ret = fault_in_user_writeable(uaddr2);
1210 [ # # ]: 0 : if (ret)
1211 : : goto out_put_keys;
1212 : :
1213 [ # # ]: 0 : if (!(flags & FLAGS_SHARED))
1214 : : goto retry_private;
1215 : :
1216 : 0 : put_futex_key(&key2);
1217 : 0 : put_futex_key(&key1);
1218 : : goto retry;
1219 : : }
1220 : :
1221 [ + + ]: 594978 : plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1222 [ + - ]: 594911 : if (match_futex (&this->key, &key1)) {
1223 [ + - ][ + - ]: 594911 : if (this->pi_state || this->rt_waiter) {
1224 : : ret = -EINVAL;
1225 : : goto out_unlock;
1226 : : }
1227 : 594911 : wake_futex(this);
1228 [ - + ]: 594911 : if (++ret >= nr_wake)
1229 : : break;
1230 : : }
1231 : : }
1232 : :
1233 [ + + ]: 594978 : if (op_ret > 0) {
1234 : : op_ret = 0;
1235 [ - + ]: 12 : plist_for_each_entry_safe(this, next, &hb2->chain, list) {
1236 [ # # ]: 0 : if (match_futex (&this->key, &key2)) {
1237 [ # # ][ # # ]: 0 : if (this->pi_state || this->rt_waiter) {
1238 : : ret = -EINVAL;
1239 : : goto out_unlock;
1240 : : }
1241 : 0 : wake_futex(this);
1242 [ # # ]: 0 : if (++op_ret >= nr_wake2)
1243 : : break;
1244 : : }
1245 : : }
1246 : 12 : ret += op_ret;
1247 : : }
1248 : :
1249 : : out_unlock:
1250 : : double_unlock_hb(hb1, hb2);
1251 : : out_put_keys:
1252 : 594978 : put_futex_key(&key2);
1253 : : out_put_key1:
1254 : 594978 : put_futex_key(&key1);
1255 : : out:
1256 : 594978 : return ret;
1257 : : }
1258 : :
1259 : : /**
1260 : : * requeue_futex() - Requeue a futex_q from one hb to another
1261 : : * @q: the futex_q to requeue
1262 : : * @hb1: the source hash_bucket
1263 : : * @hb2: the target hash_bucket
1264 : : * @key2: the new key for the requeued futex_q
1265 : : */
1266 : : static inline
1267 : : void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1268 : : struct futex_hash_bucket *hb2, union futex_key *key2)
1269 : : {
1270 : :
1271 : : /*
1272 : : * If key1 and key2 hash to the same bucket, no need to
1273 : : * requeue.
1274 : : */
1275 [ + - ]: 167 : if (likely(&hb1->chain != &hb2->chain)) {
1276 : 167 : plist_del(&q->list, &hb1->chain);
1277 : : hb_waiters_dec(hb1);
1278 : 167 : plist_add(&q->list, &hb2->chain);
1279 : : hb_waiters_inc(hb2);
1280 : 167 : q->lock_ptr = &hb2->lock;
1281 : : }
1282 : 167 : get_futex_key_refs(key2);
1283 : 167 : q->key = *key2;
1284 : : }
1285 : :
1286 : : /**
1287 : : * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1288 : : * @q: the futex_q
1289 : : * @key: the key of the requeue target futex
1290 : : * @hb: the hash_bucket of the requeue target futex
1291 : : *
1292 : : * During futex_requeue, with requeue_pi=1, it is possible to acquire the
1293 : : * target futex if it is uncontended or via a lock steal. Set the futex_q key
1294 : : * to the requeue target futex so the waiter can detect the wakeup on the right
1295 : : * futex, but remove it from the hb and NULL the rt_waiter so it can detect
1296 : : * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1297 : : * to protect access to the pi_state to fixup the owner later. Must be called
1298 : : * with both q->lock_ptr and hb->lock held.
1299 : : */
1300 : : static inline
1301 : : void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1302 : : struct futex_hash_bucket *hb)
1303 : : {
1304 : 0 : get_futex_key_refs(key);
1305 : 0 : q->key = *key;
1306 : :
1307 : 0 : __unqueue_futex(q);
1308 : :
1309 [ # # ][ # # ]: 0 : WARN_ON(!q->rt_waiter);
1310 : 0 : q->rt_waiter = NULL;
1311 : :
1312 : 0 : q->lock_ptr = &hb->lock;
1313 : :
1314 : 0 : wake_up_state(q->task, TASK_NORMAL);
1315 : : }
1316 : :
1317 : : /**
1318 : : * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
1319 : : * @pifutex: the user address of the to futex
1320 : : * @hb1: the from futex hash bucket, must be locked by the caller
1321 : : * @hb2: the to futex hash bucket, must be locked by the caller
1322 : : * @key1: the from futex key
1323 : : * @key2: the to futex key
1324 : : * @ps: address to store the pi_state pointer
1325 : : * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
1326 : : *
1327 : : * Try and get the lock on behalf of the top waiter if we can do it atomically.
1328 : : * Wake the top waiter if we succeed. If the caller specified set_waiters,
1329 : : * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
1330 : : * hb1 and hb2 must be held by the caller.
1331 : : *
1332 : : * Return:
1333 : : * 0 - failed to acquire the lock atomically;
1334 : : * 1 - acquired the lock;
1335 : : * <0 - error
1336 : : */
1337 : 0 : static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1338 : : struct futex_hash_bucket *hb1,
1339 : : struct futex_hash_bucket *hb2,
1340 : : union futex_key *key1, union futex_key *key2,
1341 : : struct futex_pi_state **ps, int set_waiters)
1342 : : {
1343 : : struct futex_q *top_waiter = NULL;
1344 : : u32 curval;
1345 : : int ret;
1346 : :
1347 [ # # ]: 0 : if (get_futex_value_locked(&curval, pifutex))
1348 : : return -EFAULT;
1349 : :
1350 : : /*
1351 : : * Find the top_waiter and determine if there are additional waiters.
1352 : : * If the caller intends to requeue more than 1 waiter to pifutex,
1353 : : * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
1354 : : * as we have means to handle the possible fault. If not, don't set
1355 : : * the bit unecessarily as it will force the subsequent unlock to enter
1356 : : * the kernel.
1357 : : */
1358 : 0 : top_waiter = futex_top_waiter(hb1, key1);
1359 : :
1360 : : /* There are no waiters, nothing for us to do. */
1361 [ # # ]: 0 : if (!top_waiter)
1362 : : return 0;
1363 : :
1364 : : /* Ensure we requeue to the expected futex. */
1365 [ # # ]: 0 : if (!match_futex(top_waiter->requeue_pi_key, key2))
1366 : : return -EINVAL;
1367 : :
1368 : : /*
1369 : : * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
1370 : : * the contended case or if set_waiters is 1. The pi_state is returned
1371 : : * in ps in contended cases.
1372 : : */
1373 : 0 : ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1374 : : set_waiters);
1375 [ # # ]: 0 : if (ret == 1)
1376 : : requeue_pi_wake_futex(top_waiter, key2, hb2);
1377 : :
1378 : 0 : return ret;
1379 : : }
1380 : :
1381 : : /**
1382 : : * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
1383 : : * @uaddr1: source futex user address
1384 : : * @flags: futex flags (FLAGS_SHARED, etc.)
1385 : : * @uaddr2: target futex user address
1386 : : * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
1387 : : * @nr_requeue: number of waiters to requeue (0-INT_MAX)
1388 : : * @cmpval: @uaddr1 expected value (or %NULL)
1389 : : * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
1390 : : * pi futex (pi to pi requeue is not supported)
1391 : : *
1392 : : * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
1393 : : * uaddr2 atomically on behalf of the top waiter.
1394 : : *
1395 : : * Return:
1396 : : * >=0 - on success, the number of tasks requeued or woken;
1397 : : * <0 - on error
1398 : : */
1399 : 0 : static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1400 : : u32 __user *uaddr2, int nr_wake, int nr_requeue,
1401 : : u32 *cmpval, int requeue_pi)
1402 : : {
1403 : 169 : union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1404 : : int drop_count = 0, task_count = 0, ret;
1405 : 169 : struct futex_pi_state *pi_state = NULL;
1406 : : struct futex_hash_bucket *hb1, *hb2;
1407 : : struct futex_q *this, *next;
1408 : : u32 curval2;
1409 : :
1410 [ - + ]: 169 : if (requeue_pi) {
1411 : : /*
1412 : : * requeue_pi requires a pi_state, try to allocate it now
1413 : : * without any locks in case it fails.
1414 : : */
1415 [ # # ]: 0 : if (refill_pi_state_cache())
1416 : : return -ENOMEM;
1417 : : /*
1418 : : * requeue_pi must wake as many tasks as it can, up to nr_wake
1419 : : * + nr_requeue, since it acquires the rt_mutex prior to
1420 : : * returning to userspace, so as to not leave the rt_mutex with
1421 : : * waiters and no owner. However, second and third wake-ups
1422 : : * cannot be predicted as they involve race conditions with the
1423 : : * first wake and a fault while looking up the pi_state. Both
1424 : : * pthread_cond_signal() and pthread_cond_broadcast() should
1425 : : * use nr_wake=1.
1426 : : */
1427 [ # # ]: 169 : if (nr_wake != 1)
1428 : : return -EINVAL;
1429 : : }
1430 : :
1431 : : retry:
1432 [ - + ]: 169 : if (pi_state != NULL) {
1433 : : /*
1434 : : * We will have to lookup the pi_state again, so free this one
1435 : : * to keep the accounting correct.
1436 : : */
1437 : 0 : free_pi_state(pi_state);
1438 : 0 : pi_state = NULL;
1439 : : }
1440 : :
1441 : 169 : ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1442 [ + - ]: 169 : if (unlikely(ret != 0))
1443 : : goto out;
1444 : 169 : ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1445 : : requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1446 [ + - ]: 169 : if (unlikely(ret != 0))
1447 : : goto out_put_key1;
1448 : :
1449 : 169 : hb1 = hash_futex(&key1);
1450 : 169 : hb2 = hash_futex(&key2);
1451 : :
1452 : : retry_private:
1453 : : double_lock_hb(hb1, hb2);
1454 : :
1455 [ + - ]: 169 : if (likely(cmpval != NULL)) {
1456 : : u32 curval;
1457 : :
1458 : 169 : ret = get_futex_value_locked(&curval, uaddr1);
1459 : :
1460 [ - + ]: 169 : if (unlikely(ret)) {
1461 : : double_unlock_hb(hb1, hb2);
1462 : :
1463 : 0 : ret = get_user(curval, uaddr1);
1464 [ # # ]: 0 : if (ret)
1465 : : goto out_put_keys;
1466 : :
1467 [ # # ]: 0 : if (!(flags & FLAGS_SHARED))
1468 : : goto retry_private;
1469 : :
1470 : : put_futex_key(&key2);
1471 : : put_futex_key(&key1);
1472 : 0 : goto retry;
1473 : : }
1474 [ - + ]: 169 : if (curval != *cmpval) {
1475 : : ret = -EAGAIN;
1476 : 169 : goto out_unlock;
1477 : : }
1478 : : }
1479 : :
1480 [ - + ][ # # ]: 169 : if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1481 : : /*
1482 : : * Attempt to acquire uaddr2 and wake the top waiter. If we
1483 : : * intend to requeue waiters, force setting the FUTEX_WAITERS
1484 : : * bit. We force this here where we are able to easily handle
1485 : : * faults rather in the requeue loop below.
1486 : : */
1487 : 0 : ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1488 : : &key2, &pi_state, nr_requeue);
1489 : :
1490 : : /*
1491 : : * At this point the top_waiter has either taken uaddr2 or is
1492 : : * waiting on it. If the former, then the pi_state will not
1493 : : * exist yet, look it up one more time to ensure we have a
1494 : : * reference to it.
1495 : : */
1496 [ # # ]: 0 : if (ret == 1) {
1497 [ # # ]: 0 : WARN_ON(pi_state);
1498 : 0 : drop_count++;
1499 : 0 : task_count++;
1500 : 0 : ret = get_futex_value_locked(&curval2, uaddr2);
1501 [ # # ]: 0 : if (!ret)
1502 : 0 : ret = lookup_pi_state(curval2, hb2, &key2,
1503 : : &pi_state);
1504 : : }
1505 : :
1506 [ # # # # ]: 0 : switch (ret) {
1507 : : case 0:
1508 : : break;
1509 : : case -EFAULT:
1510 : : double_unlock_hb(hb1, hb2);
1511 : : put_futex_key(&key2);
1512 : : put_futex_key(&key1);
1513 : 0 : ret = fault_in_user_writeable(uaddr2);
1514 [ # # ]: 0 : if (!ret)
1515 : : goto retry;
1516 : : goto out;
1517 : : case -EAGAIN:
1518 : : /* The owner was exiting, try again. */
1519 : : double_unlock_hb(hb1, hb2);
1520 : : put_futex_key(&key2);
1521 : : put_futex_key(&key1);
1522 : 0 : cond_resched();
1523 : 0 : goto retry;
1524 : : default:
1525 : : goto out_unlock;
1526 : : }
1527 : : }
1528 : :
1529 [ + + ]: 505 : plist_for_each_entry_safe(this, next, &hb1->chain, list) {
1530 [ + - ]: 336 : if (task_count - nr_wake >= nr_requeue)
1531 : : break;
1532 : :
1533 [ + + ]: 336 : if (!match_futex(&this->key, &key1))
1534 : 1 : continue;
1535 : :
1536 : : /*
1537 : : * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
1538 : : * be paired with each other and no other futex ops.
1539 : : *
1540 : : * We should never be requeueing a futex_q with a pi_state,
1541 : : * which is awaiting a futex_unlock_pi().
1542 : : */
1543 [ - + ][ # # ]: 335 : if ((requeue_pi && !this->rt_waiter) ||
[ + - ]
1544 [ + - ][ + - ]: 335 : (!requeue_pi && this->rt_waiter) ||
1545 : 335 : this->pi_state) {
1546 : : ret = -EINVAL;
1547 : : break;
1548 : : }
1549 : :
1550 : : /*
1551 : : * Wake nr_wake waiters. For requeue_pi, if we acquired the
1552 : : * lock, we already woke the top_waiter. If not, it will be
1553 : : * woken by futex_unlock_pi().
1554 : : */
1555 [ + + ][ + - ]: 335 : if (++task_count <= nr_wake && !requeue_pi) {
1556 : 168 : wake_futex(this);
1557 : 168 : continue;
1558 : : }
1559 : :
1560 : : /* Ensure we requeue to the expected futex for requeue_pi. */
1561 [ - + ][ # # ]: 167 : if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1562 : : ret = -EINVAL;
1563 : : break;
1564 : : }
1565 : :
1566 : : /*
1567 : : * Requeue nr_requeue waiters and possibly one more in the case
1568 : : * of requeue_pi if we couldn't acquire the lock atomically.
1569 : : */
1570 [ - + ]: 167 : if (requeue_pi) {
1571 : : /* Prepare the waiter to take the rt_mutex. */
1572 : 0 : atomic_inc(&pi_state->refcount);
1573 : 0 : this->pi_state = pi_state;
1574 : 0 : ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1575 : : this->rt_waiter,
1576 : : this->task, 1);
1577 [ # # ]: 0 : if (ret == 1) {
1578 : : /* We got the lock. */
1579 : : requeue_pi_wake_futex(this, &key2, hb2);
1580 : 0 : drop_count++;
1581 : 0 : continue;
1582 [ # # ]: 0 : } else if (ret) {
1583 : : /* -EDEADLK */
1584 : 0 : this->pi_state = NULL;
1585 : 0 : free_pi_state(pi_state);
1586 : 0 : goto out_unlock;
1587 : : }
1588 : : }
1589 : : requeue_futex(this, hb1, hb2, &key2);
1590 : 167 : drop_count++;
1591 : : }
1592 : :
1593 : : out_unlock:
1594 : : double_unlock_hb(hb1, hb2);
1595 : :
1596 : : /*
1597 : : * drop_futex_key_refs() must be called outside the spinlocks. During
1598 : : * the requeue we moved futex_q's from the hash bucket at key1 to the
1599 : : * one at key2 and updated their key pointer. We no longer need to
1600 : : * hold the references to key1.
1601 : : */
1602 [ + + ]: 336 : while (--drop_count >= 0)
1603 : 167 : drop_futex_key_refs(&key1);
1604 : :
1605 : : out_put_keys:
1606 : : put_futex_key(&key2);
1607 : : out_put_key1:
1608 : : put_futex_key(&key1);
1609 : : out:
1610 [ - + ]: 169 : if (pi_state != NULL)
1611 : 0 : free_pi_state(pi_state);
1612 [ + - ]: 169 : return ret ? ret : task_count;
1613 : : }
1614 : :
1615 : : /* The key must be already stored in q->key. */
1616 : : static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1617 : : __acquires(&hb->lock)
1618 : : {
1619 : : struct futex_hash_bucket *hb;
1620 : :
1621 : 3979773 : hb = hash_futex(&q->key);
1622 : :
1623 : : /*
1624 : : * Increment the counter before taking the lock so that
1625 : : * a potential waker won't miss a to-be-slept task that is
1626 : : * waiting for the spinlock. This is safe as all queue_lock()
1627 : : * users end up calling queue_me(). Similarly, for housekeeping,
1628 : : * decrement the counter at queue_unlock() when some error has
1629 : : * occurred and we don't end up adding the task to the list.
1630 : : */
1631 : : hb_waiters_inc(hb);
1632 : :
1633 : 3979603 : q->lock_ptr = &hb->lock;
1634 : :
1635 : : spin_lock(&hb->lock); /* implies MB (A) */
1636 : : return hb;
1637 : : }
1638 : :
1639 : : static inline void
1640 : : queue_unlock(struct futex_hash_bucket *hb)
1641 : : __releases(&hb->lock)
1642 : : {
1643 : : spin_unlock(&hb->lock);
1644 : : hb_waiters_dec(hb);
1645 : : }
1646 : :
1647 : : /**
1648 : : * queue_me() - Enqueue the futex_q on the futex_hash_bucket
1649 : : * @q: The futex_q to enqueue
1650 : : * @hb: The destination hash bucket
1651 : : *
1652 : : * The hb->lock must be held by the caller, and is released here. A call to
1653 : : * queue_me() is typically paired with exactly one call to unqueue_me(). The
1654 : : * exceptions involve the PI related operations, which may use unqueue_me_pi()
1655 : : * or nothing if the unqueue is done as part of the wake process and the unqueue
1656 : : * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
1657 : : * an example).
1658 : : */
1659 : : static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1660 : : __releases(&hb->lock)
1661 : : {
1662 : : int prio;
1663 : :
1664 : : /*
1665 : : * The priority used to register this element is
1666 : : * - either the real thread-priority for the real-time threads
1667 : : * (i.e. threads with a priority lower than MAX_RT_PRIO)
1668 : : * - or MAX_RT_PRIO for non-RT threads.
1669 : : * Thus, all RT-threads are woken first in priority order, and
1670 : : * the others are woken last, in FIFO order.
1671 : : */
1672 : 1203080 : prio = min(current->normal_prio, MAX_RT_PRIO);
1673 : :
1674 : : plist_node_init(&q->list, prio);
1675 : 1203080 : plist_add(&q->list, &hb->chain);
1676 : 1203032 : q->task = current;
1677 : : spin_unlock(&hb->lock);
1678 : : }
1679 : :
1680 : : /**
1681 : : * unqueue_me() - Remove the futex_q from its futex_hash_bucket
1682 : : * @q: The futex_q to unqueue
1683 : : *
1684 : : * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
1685 : : * be paired with exactly one earlier call to queue_me().
1686 : : *
1687 : : * Return:
1688 : : * 1 - if the futex_q was still queued (and we removed unqueued it);
1689 : : * 0 - if the futex_q was already removed by the waking thread
1690 : : */
1691 : 1202859 : static int unqueue_me(struct futex_q *q)
1692 : : {
1693 : : spinlock_t *lock_ptr;
1694 : : int ret = 0;
1695 : :
1696 : : /* In the common case we don't take the spinlock, which is nice. */
1697 : : retry:
1698 : 1202861 : lock_ptr = q->lock_ptr;
1699 : 1202861 : barrier();
1700 [ + + ]: 1202867 : if (lock_ptr != NULL) {
1701 : : spin_lock(lock_ptr);
1702 : : /*
1703 : : * q->lock_ptr can change between reading it and
1704 : : * spin_lock(), causing us to take the wrong lock. This
1705 : : * corrects the race condition.
1706 : : *
1707 : : * Reasoning goes like this: if we have the wrong lock,
1708 : : * q->lock_ptr must have changed (maybe several times)
1709 : : * between reading it and the spin_lock(). It can
1710 : : * change again after the spin_lock() but only if it was
1711 : : * already changed before the spin_lock(). It cannot,
1712 : : * however, change back to the original value. Therefore
1713 : : * we can detect whether we acquired the correct lock.
1714 : : */
1715 [ + + ]: 100021 : if (unlikely(lock_ptr != q->lock_ptr)) {
1716 : : spin_unlock(lock_ptr);
1717 : : goto retry;
1718 : : }
1719 : 100019 : __unqueue_futex(q);
1720 : :
1721 [ - + ]: 100019 : BUG_ON(q->pi_state);
1722 : :
1723 : : spin_unlock(lock_ptr);
1724 : : ret = 1;
1725 : : }
1726 : :
1727 : 1202866 : drop_futex_key_refs(&q->key);
1728 : 1202954 : return ret;
1729 : : }
1730 : :
1731 : : /*
1732 : : * PI futexes can not be requeued and must remove themself from the
1733 : : * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
1734 : : * and dropped here.
1735 : : */
1736 : 0 : static void unqueue_me_pi(struct futex_q *q)
1737 : : __releases(q->lock_ptr)
1738 : : {
1739 : 0 : __unqueue_futex(q);
1740 : :
1741 [ # # ]: 0 : BUG_ON(!q->pi_state);
1742 : 0 : free_pi_state(q->pi_state);
1743 : 0 : q->pi_state = NULL;
1744 : :
1745 : 0 : spin_unlock(q->lock_ptr);
1746 : 0 : }
1747 : :
1748 : : /*
1749 : : * Fixup the pi_state owner with the new owner.
1750 : : *
1751 : : * Must be called with hash bucket lock held and mm->sem held for non
1752 : : * private futexes.
1753 : : */
1754 : 0 : static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1755 : : struct task_struct *newowner)
1756 : : {
1757 : 0 : u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1758 : 0 : struct futex_pi_state *pi_state = q->pi_state;
1759 : 0 : struct task_struct *oldowner = pi_state->owner;
1760 : : u32 uval, uninitialized_var(curval), newval;
1761 : : int ret;
1762 : :
1763 : : /* Owner died? */
1764 [ # # ]: 0 : if (!pi_state->owner)
1765 : 0 : newtid |= FUTEX_OWNER_DIED;
1766 : :
1767 : : /*
1768 : : * We are here either because we stole the rtmutex from the
1769 : : * previous highest priority waiter or we are the highest priority
1770 : : * waiter but failed to get the rtmutex the first time.
1771 : : * We have to replace the newowner TID in the user space variable.
1772 : : * This must be atomic as we have to preserve the owner died bit here.
1773 : : *
1774 : : * Note: We write the user space value _before_ changing the pi_state
1775 : : * because we can fault here. Imagine swapped out pages or a fork
1776 : : * that marked all the anonymous memory readonly for cow.
1777 : : *
1778 : : * Modifying pi_state _before_ the user space value would
1779 : : * leave the pi_state in an inconsistent state when we fault
1780 : : * here, because we need to drop the hash bucket lock to
1781 : : * handle the fault. This might be observed in the PID check
1782 : : * in lookup_pi_state.
1783 : : */
1784 : : retry:
1785 [ # # ]: 0 : if (get_futex_value_locked(&uval, uaddr))
1786 : : goto handle_fault;
1787 : :
1788 : : while (1) {
1789 : 0 : newval = (uval & FUTEX_OWNER_DIED) | newtid;
1790 : :
1791 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1792 : : goto handle_fault;
1793 [ # # ]: 0 : if (curval == uval)
1794 : : break;
1795 : 0 : uval = curval;
1796 : : }
1797 : :
1798 : : /*
1799 : : * We fixed up user space. Now we need to fix the pi_state
1800 : : * itself.
1801 : : */
1802 [ # # ]: 0 : if (pi_state->owner != NULL) {
1803 : 0 : raw_spin_lock_irq(&pi_state->owner->pi_lock);
1804 [ # # ]: 0 : WARN_ON(list_empty(&pi_state->list));
1805 : : list_del_init(&pi_state->list);
1806 : 0 : raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1807 : : }
1808 : :
1809 : 0 : pi_state->owner = newowner;
1810 : :
1811 : 0 : raw_spin_lock_irq(&newowner->pi_lock);
1812 [ # # ]: 0 : WARN_ON(!list_empty(&pi_state->list));
1813 : 0 : list_add(&pi_state->list, &newowner->pi_state_list);
1814 : : raw_spin_unlock_irq(&newowner->pi_lock);
1815 : : return 0;
1816 : :
1817 : : /*
1818 : : * To handle the page fault we need to drop the hash bucket
1819 : : * lock here. That gives the other task (either the highest priority
1820 : : * waiter itself or the task which stole the rtmutex) the
1821 : : * chance to try the fixup of the pi_state. So once we are
1822 : : * back from handling the fault we need to check the pi_state
1823 : : * after reacquiring the hash bucket lock and before trying to
1824 : : * do another fixup. When the fixup has been done already we
1825 : : * simply return.
1826 : : */
1827 : : handle_fault:
1828 : 0 : spin_unlock(q->lock_ptr);
1829 : :
1830 : 0 : ret = fault_in_user_writeable(uaddr);
1831 : :
1832 : 0 : spin_lock(q->lock_ptr);
1833 : :
1834 : : /*
1835 : : * Check if someone else fixed it for us:
1836 : : */
1837 [ # # ]: 0 : if (pi_state->owner != oldowner)
1838 : : return 0;
1839 : :
1840 [ # # ]: 0 : if (ret)
1841 : : return ret;
1842 : :
1843 : : goto retry;
1844 : : }
1845 : :
1846 : : static long futex_wait_restart(struct restart_block *restart);
1847 : :
1848 : : /**
1849 : : * fixup_owner() - Post lock pi_state and corner case management
1850 : : * @uaddr: user address of the futex
1851 : : * @q: futex_q (contains pi_state and access to the rt_mutex)
1852 : : * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
1853 : : *
1854 : : * After attempting to lock an rt_mutex, this function is called to cleanup
1855 : : * the pi_state owner as well as handle race conditions that may allow us to
1856 : : * acquire the lock. Must be called with the hb lock held.
1857 : : *
1858 : : * Return:
1859 : : * 1 - success, lock taken;
1860 : : * 0 - success, lock not taken;
1861 : : * <0 - on error (-EFAULT)
1862 : : */
1863 : 0 : static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1864 : : {
1865 : : struct task_struct *owner;
1866 : : int ret = 0;
1867 : :
1868 [ # # ]: 0 : if (locked) {
1869 : : /*
1870 : : * Got the lock. We might not be the anticipated owner if we
1871 : : * did a lock-steal - fix up the PI-state in that case:
1872 : : */
1873 [ # # ]: 0 : if (q->pi_state->owner != current)
1874 : 0 : ret = fixup_pi_state_owner(uaddr, q, current);
1875 : : goto out;
1876 : : }
1877 : :
1878 : : /*
1879 : : * Catch the rare case, where the lock was released when we were on the
1880 : : * way back before we locked the hash bucket.
1881 : : */
1882 [ # # ]: 0 : if (q->pi_state->owner == current) {
1883 : : /*
1884 : : * Try to get the rt_mutex now. This might fail as some other
1885 : : * task acquired the rt_mutex after we removed ourself from the
1886 : : * rt_mutex waiters list.
1887 : : */
1888 [ # # ]: 0 : if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1889 : : locked = 1;
1890 : : goto out;
1891 : : }
1892 : :
1893 : : /*
1894 : : * pi_state is incorrect, some other task did a lock steal and
1895 : : * we returned due to timeout or signal without taking the
1896 : : * rt_mutex. Too late.
1897 : : */
1898 : 0 : raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1899 : 0 : owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1900 [ # # ]: 0 : if (!owner)
1901 : 0 : owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1902 : 0 : raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1903 : 0 : ret = fixup_pi_state_owner(uaddr, q, owner);
1904 : 0 : goto out;
1905 : : }
1906 : :
1907 : : /*
1908 : : * Paranoia check. If we did not take the lock, then we should not be
1909 : : * the owner of the rt_mutex.
1910 : : */
1911 [ # # ]: 0 : if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1912 : 0 : printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1913 : : "pi-state %p\n", ret,
1914 : : q->pi_state->pi_mutex.owner,
1915 : : q->pi_state->owner);
1916 : :
1917 : : out:
1918 [ # # ]: 0 : return ret ? ret : locked;
1919 : : }
1920 : :
1921 : : /**
1922 : : * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
1923 : : * @hb: the futex hash bucket, must be locked by the caller
1924 : : * @q: the futex_q to queue up on
1925 : : * @timeout: the prepared hrtimer_sleeper, or null for no timeout
1926 : : */
1927 : 0 : static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1928 : : struct hrtimer_sleeper *timeout)
1929 : : {
1930 : : /*
1931 : : * The task state is guaranteed to be set before another task can
1932 : : * wake it. set_current_state() is implemented using set_mb() and
1933 : : * queue_me() calls spin_unlock() upon completion, both serializing
1934 : : * access to the hash list and forcing another memory barrier.
1935 : : */
1936 : 1203104 : set_current_state(TASK_INTERRUPTIBLE);
1937 : : queue_me(q, hb);
1938 : :
1939 : : /* Arm the timer */
1940 [ + + ]: 1203082 : if (timeout) {
1941 : 986489 : hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1942 [ + + ]: 493245 : if (!hrtimer_active(&timeout->timer))
1943 : 34 : timeout->task = NULL;
1944 : : }
1945 : :
1946 : : /*
1947 : : * If we have been removed from the hash list, then another task
1948 : : * has tried to wake us, and we can skip the call to schedule().
1949 : : */
1950 [ + ]: 1203083 : if (likely(!plist_node_empty(&q->list))) {
1951 : : /*
1952 : : * If the timer has already expired, current will already be
1953 : : * flagged for rescheduling. Only call schedule if there
1954 : : * is no timeout, or if it has yet to expire.
1955 : : */
1956 [ + + ][ + + ]: 1203127 : if (!timeout || timeout->task)
1957 : : freezable_schedule();
1958 : : }
1959 : 0 : __set_current_state(TASK_RUNNING);
1960 : 0 : }
1961 : :
1962 : : /**
1963 : : * futex_wait_setup() - Prepare to wait on a futex
1964 : : * @uaddr: the futex userspace address
1965 : : * @val: the expected value
1966 : : * @flags: futex flags (FLAGS_SHARED, etc.)
1967 : : * @q: the associated futex_q
1968 : : * @hb: storage for hash_bucket pointer to be returned to caller
1969 : : *
1970 : : * Setup the futex_q and locate the hash_bucket. Get the futex value and
1971 : : * compare it with the expected value. Handle atomic faults internally.
1972 : : * Return with the hb lock held and a q.key reference on success, and unlocked
1973 : : * with no q.key reference on failure.
1974 : : *
1975 : : * Return:
1976 : : * 0 - uaddr contains val and hb has been locked;
1977 : : * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
1978 : : */
1979 : 3979544 : static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1980 : : struct futex_q *q, struct futex_hash_bucket **hb)
1981 : : {
1982 : : u32 uval;
1983 : : int ret;
1984 : :
1985 : : /*
1986 : : * Access the page AFTER the hash-bucket is locked.
1987 : : * Order is important:
1988 : : *
1989 : : * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
1990 : : * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); }
1991 : : *
1992 : : * The basic logical guarantee of a futex is that it blocks ONLY
1993 : : * if cond(var) is known to be true at the time of blocking, for
1994 : : * any cond. If we locked the hash-bucket after testing *uaddr, that
1995 : : * would open a race condition where we could block indefinitely with
1996 : : * cond(var) false, which would violate the guarantee.
1997 : : *
1998 : : * On the other hand, we insert q and release the hash-bucket only
1999 : : * after testing *uaddr. This guarantees that futex_wait() will NOT
2000 : : * absorb a wakeup if *uaddr does not match the desired values
2001 : : * while the syscall executes.
2002 : : */
2003 : : retry:
2004 : 3979544 : ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
2005 [ + + ]: 3979843 : if (unlikely(ret != 0))
2006 : : return ret;
2007 : :
2008 : : retry_private:
2009 : 3980020 : *hb = queue_lock(q);
2010 : :
2011 : 3980020 : ret = get_futex_value_locked(&uval, uaddr);
2012 : :
2013 [ - + ]: 3980050 : if (ret) {
2014 : 0 : queue_unlock(*hb);
2015 : :
2016 : 0 : ret = get_user(uval, uaddr);
2017 [ # # ]: 0 : if (ret)
2018 : : goto out;
2019 : :
2020 [ # # ]: 0 : if (!(flags & FLAGS_SHARED))
2021 : : goto retry_private;
2022 : :
2023 : : put_futex_key(&q->key);
2024 : : goto retry;
2025 : : }
2026 : :
2027 [ + + ]: 3980050 : if (uval != val) {
2028 : 2776958 : queue_unlock(*hb);
2029 : : ret = -EWOULDBLOCK;
2030 : : }
2031 : :
2032 : : out:
2033 [ + + ]: 7959559 : if (ret)
2034 : : put_futex_key(&q->key);
2035 : 3980015 : return ret;
2036 : : }
2037 : :
2038 : 0 : static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
2039 : : ktime_t *abs_time, u32 bitset)
2040 : : {
2041 : : struct hrtimer_sleeper timeout, *to = NULL;
2042 : : struct restart_block *restart;
2043 : : struct futex_hash_bucket *hb;
2044 : 3979418 : struct futex_q q = futex_q_init;
2045 : : int ret;
2046 : :
2047 [ + ]: 3979418 : if (!bitset)
2048 : : return -EINVAL;
2049 : 3979820 : q.bitset = bitset;
2050 : :
2051 [ + + ]: 3979820 : if (abs_time) {
2052 : : to = &timeout;
2053 : :
2054 : 493259 : hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2055 : : CLOCK_REALTIME : CLOCK_MONOTONIC,
2056 : : HRTIMER_MODE_ABS);
2057 : 493259 : hrtimer_init_sleeper(to, current);
2058 : 493259 : hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2059 : 493259 : current->timer_slack_ns);
2060 : : }
2061 : :
2062 : : retry:
2063 : : /*
2064 : : * Prepare to wait on uaddr. On success, holds hb lock and increments
2065 : : * q.key refs.
2066 : : */
2067 : 170 : ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2068 [ + + ]: 3979995 : if (ret)
2069 : : goto out;
2070 : :
2071 : : /* queue_me and wait for wakeup, timeout, or a signal. */
2072 : 1203069 : futex_wait_queue_me(hb, &q, to);
2073 : :
2074 : : /* If we were woken (and unqueued), we succeeded, whatever. */
2075 : : ret = 0;
2076 : : /* unqueue_me() drops q.key ref */
2077 [ + + ]: 1202731 : if (!unqueue_me(&q))
2078 : : goto out;
2079 : : ret = -ETIMEDOUT;
2080 [ + + ][ + ]: 100019 : if (to && !to->task)
2081 : : goto out;
2082 : :
2083 : : /*
2084 : : * We expect signal_pending(current), but we might be the
2085 : : * victim of a spurious wakeup as well.
2086 : : */
2087 [ + ]: 0 : if (!signal_pending(current))
2088 : : goto retry;
2089 : :
2090 : : ret = -ERESTARTSYS;
2091 [ + + ]: 16 : if (!abs_time)
2092 : : goto out;
2093 : :
2094 : : restart = ¤t_thread_info()->restart_block;
2095 : 5 : restart->fn = futex_wait_restart;
2096 : 5 : restart->futex.uaddr = uaddr;
2097 : 5 : restart->futex.val = val;
2098 : 5 : restart->futex.time = abs_time->tv64;
2099 : 5 : restart->futex.bitset = bitset;
2100 : 5 : restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
2101 : :
2102 : : ret = -ERESTART_RESTARTBLOCK;
2103 : :
2104 : : out:
2105 [ + + ]: 3979911 : if (to) {
2106 : 493259 : hrtimer_cancel(&to->timer);
2107 : : destroy_hrtimer_on_stack(&to->timer);
2108 : : }
2109 : 3979911 : return ret;
2110 : : }
2111 : :
2112 : :
2113 : 0 : static long futex_wait_restart(struct restart_block *restart)
2114 : : {
2115 : 0 : u32 __user *uaddr = restart->futex.uaddr;
2116 : : ktime_t t, *tp = NULL;
2117 : :
2118 [ # # ]: 0 : if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
2119 : 0 : t.tv64 = restart->futex.time;
2120 : : tp = &t;
2121 : : }
2122 : 0 : restart->fn = do_no_restart_syscall;
2123 : :
2124 : 0 : return (long)futex_wait(uaddr, restart->futex.flags,
2125 : : restart->futex.val, tp, restart->futex.bitset);
2126 : : }
2127 : :
2128 : :
2129 : : /*
2130 : : * Userspace tried a 0 -> TID atomic transition of the futex value
2131 : : * and failed. The kernel side here does the whole locking operation:
2132 : : * if there are waiters then it will block, it does PI, etc. (Due to
2133 : : * races the kernel might see a 0 value of the futex too.)
2134 : : */
2135 : 0 : static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
2136 : : ktime_t *time, int trylock)
2137 : : {
2138 : : struct hrtimer_sleeper timeout, *to = NULL;
2139 : : struct futex_hash_bucket *hb;
2140 : 0 : struct futex_q q = futex_q_init;
2141 : : int res, ret;
2142 : :
2143 [ # # ]: 0 : if (refill_pi_state_cache())
2144 : : return -ENOMEM;
2145 : :
2146 [ # # ]: 0 : if (time) {
2147 : : to = &timeout;
2148 : : hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2149 : : HRTIMER_MODE_ABS);
2150 : 0 : hrtimer_init_sleeper(to, current);
2151 : : hrtimer_set_expires(&to->timer, *time);
2152 : : }
2153 : :
2154 : : retry:
2155 : 0 : ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2156 [ # # ]: 0 : if (unlikely(ret != 0))
2157 : : goto out;
2158 : :
2159 : : retry_private:
2160 : : hb = queue_lock(&q);
2161 : :
2162 : 0 : ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2163 [ # # ]: 0 : if (unlikely(ret)) {
2164 [ # # # # ]: 0 : switch (ret) {
2165 : : case 1:
2166 : : /* We got the lock. */
2167 : : ret = 0;
2168 : : goto out_unlock_put_key;
2169 : : case -EFAULT:
2170 : : goto uaddr_faulted;
2171 : : case -EAGAIN:
2172 : : /*
2173 : : * Task is exiting and we just wait for the
2174 : : * exit to complete.
2175 : : */
2176 : : queue_unlock(hb);
2177 : : put_futex_key(&q.key);
2178 : 0 : cond_resched();
2179 : : goto retry;
2180 : : default:
2181 : : goto out_unlock_put_key;
2182 : : }
2183 : : }
2184 : :
2185 : : /*
2186 : : * Only actually queue now that the atomic ops are done:
2187 : : */
2188 : : queue_me(&q, hb);
2189 : :
2190 [ # # ]: 0 : WARN_ON(!q.pi_state);
2191 : : /*
2192 : : * Block on the PI mutex:
2193 : : */
2194 [ # # ]: 0 : if (!trylock)
2195 : 0 : ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2196 : : else {
2197 : 0 : ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2198 : : /* Fixup the trylock return value: */
2199 [ # # ]: 0 : ret = ret ? 0 : -EWOULDBLOCK;
2200 : : }
2201 : :
2202 : 0 : spin_lock(q.lock_ptr);
2203 : : /*
2204 : : * Fixup the pi_state owner and possibly acquire the lock if we
2205 : : * haven't already.
2206 : : */
2207 : 0 : res = fixup_owner(uaddr, &q, !ret);
2208 : : /*
2209 : : * If fixup_owner() returned an error, proprogate that. If it acquired
2210 : : * the lock, clear our -ETIMEDOUT or -EINTR.
2211 : : */
2212 [ # # ]: 0 : if (res)
2213 : 0 : ret = (res < 0) ? res : 0;
2214 : :
2215 : : /*
2216 : : * If fixup_owner() faulted and was unable to handle the fault, unlock
2217 : : * it and return the fault to userspace.
2218 : : */
2219 [ # # ][ # # ]: 0 : if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2220 : 0 : rt_mutex_unlock(&q.pi_state->pi_mutex);
2221 : :
2222 : : /* Unqueue and drop the lock */
2223 : 0 : unqueue_me_pi(&q);
2224 : :
2225 : : goto out_put_key;
2226 : :
2227 : : out_unlock_put_key:
2228 : : queue_unlock(hb);
2229 : :
2230 : : out_put_key:
2231 : : put_futex_key(&q.key);
2232 : : out:
2233 : : if (to)
2234 : : destroy_hrtimer_on_stack(&to->timer);
2235 [ # # ]: 0 : return ret != -EINTR ? ret : -ERESTARTNOINTR;
2236 : :
2237 : : uaddr_faulted:
2238 : : queue_unlock(hb);
2239 : :
2240 : 0 : ret = fault_in_user_writeable(uaddr);
2241 [ # # ]: 0 : if (ret)
2242 : : goto out_put_key;
2243 : :
2244 [ # # ]: 0 : if (!(flags & FLAGS_SHARED))
2245 : : goto retry_private;
2246 : :
2247 : : put_futex_key(&q.key);
2248 : : goto retry;
2249 : : }
2250 : :
2251 : : /*
2252 : : * Userspace attempted a TID -> 0 atomic transition, and failed.
2253 : : * This is the in-kernel slowpath: we look up the PI state (if any),
2254 : : * and do the rt-mutex unlock.
2255 : : */
2256 : 0 : static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2257 : : {
2258 : : struct futex_hash_bucket *hb;
2259 : 0 : struct futex_q *this, *next;
2260 : 0 : union futex_key key = FUTEX_KEY_INIT;
2261 : 0 : u32 uval, vpid = task_pid_vnr(current);
2262 : : int ret;
2263 : :
2264 : : retry:
2265 [ # # ]: 0 : if (get_user(uval, uaddr))
2266 : : return -EFAULT;
2267 : : /*
2268 : : * We release only a lock we actually own:
2269 : : */
2270 [ # # ]: 0 : if ((uval & FUTEX_TID_MASK) != vpid)
2271 : : return -EPERM;
2272 : :
2273 : 0 : ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2274 [ # # ]: 0 : if (unlikely(ret != 0))
2275 : : goto out;
2276 : :
2277 : 0 : hb = hash_futex(&key);
2278 : : spin_lock(&hb->lock);
2279 : :
2280 : : /*
2281 : : * To avoid races, try to do the TID -> 0 atomic transition
2282 : : * again. If it succeeds then we can return without waking
2283 : : * anyone else up:
2284 : : */
2285 [ # # # # ]: 0 : if (!(uval & FUTEX_OWNER_DIED) &&
2286 : 0 : cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2287 : : goto pi_faulted;
2288 : : /*
2289 : : * Rare case: we managed to release the lock atomically,
2290 : : * no need to wake anyone else up:
2291 : : */
2292 [ # # ]: 0 : if (unlikely(uval == vpid))
2293 : : goto out_unlock;
2294 : :
2295 : : /*
2296 : : * Ok, other tasks may need to be woken up - check waiters
2297 : : * and do the wakeup if necessary:
2298 : : */
2299 [ # # ]: 0 : plist_for_each_entry_safe(this, next, &hb->chain, list) {
2300 [ # # ]: 0 : if (!match_futex (&this->key, &key))
2301 : 0 : continue;
2302 : 0 : ret = wake_futex_pi(uaddr, uval, this);
2303 : : /*
2304 : : * The atomic access to the futex value
2305 : : * generated a pagefault, so retry the
2306 : : * user-access and the wakeup:
2307 : : */
2308 [ # # ]: 0 : if (ret == -EFAULT)
2309 : : goto pi_faulted;
2310 : : goto out_unlock;
2311 : : }
2312 : : /*
2313 : : * No waiters - kernel unlocks the futex:
2314 : : */
2315 [ # # ]: 0 : if (!(uval & FUTEX_OWNER_DIED)) {
2316 : 0 : ret = unlock_futex_pi(uaddr, uval);
2317 [ # # ]: 0 : if (ret == -EFAULT)
2318 : : goto pi_faulted;
2319 : : }
2320 : :
2321 : : out_unlock:
2322 : : spin_unlock(&hb->lock);
2323 : : put_futex_key(&key);
2324 : :
2325 : : out:
2326 : 0 : return ret;
2327 : :
2328 : : pi_faulted:
2329 : : spin_unlock(&hb->lock);
2330 : : put_futex_key(&key);
2331 : :
2332 : 0 : ret = fault_in_user_writeable(uaddr);
2333 [ # # ]: 0 : if (!ret)
2334 : : goto retry;
2335 : :
2336 : : return ret;
2337 : : }
2338 : :
2339 : : /**
2340 : : * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
2341 : : * @hb: the hash_bucket futex_q was original enqueued on
2342 : : * @q: the futex_q woken while waiting to be requeued
2343 : : * @key2: the futex_key of the requeue target futex
2344 : : * @timeout: the timeout associated with the wait (NULL if none)
2345 : : *
2346 : : * Detect if the task was woken on the initial futex as opposed to the requeue
2347 : : * target futex. If so, determine if it was a timeout or a signal that caused
2348 : : * the wakeup and return the appropriate error code to the caller. Must be
2349 : : * called with the hb lock held.
2350 : : *
2351 : : * Return:
2352 : : * 0 = no early wakeup detected;
2353 : : * <0 = -ETIMEDOUT or -ERESTARTNOINTR
2354 : : */
2355 : : static inline
2356 : : int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2357 : : struct futex_q *q, union futex_key *key2,
2358 : : struct hrtimer_sleeper *timeout)
2359 : : {
2360 : : int ret = 0;
2361 : :
2362 : : /*
2363 : : * With the hb lock held, we avoid races while we process the wakeup.
2364 : : * We only need to hold hb (and not hb2) to ensure atomicity as the
2365 : : * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
2366 : : * It can't be requeued from uaddr2 to something else since we don't
2367 : : * support a PI aware source futex for requeue.
2368 : : */
2369 [ # # ]: 0 : if (!match_futex(&q->key, key2)) {
2370 [ # # ][ # # ]: 0 : WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
[ # # ]
2371 : : /*
2372 : : * We were woken prior to requeue by a timeout or a signal.
2373 : : * Unqueue the futex_q and determine which it was.
2374 : : */
2375 : 0 : plist_del(&q->list, &hb->chain);
2376 : : hb_waiters_dec(hb);
2377 : :
2378 : : /* Handle spurious wakeups gracefully */
2379 : : ret = -EWOULDBLOCK;
2380 [ # # ][ # # ]: 0 : if (timeout && !timeout->task)
2381 : : ret = -ETIMEDOUT;
2382 [ # # ]: 0 : else if (signal_pending(current))
2383 : : ret = -ERESTARTNOINTR;
2384 : : }
2385 : : return ret;
2386 : : }
2387 : :
2388 : : /**
2389 : : * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2390 : : * @uaddr: the futex we initially wait on (non-pi)
2391 : : * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
2392 : : * the same type, no requeueing from private to shared, etc.
2393 : : * @val: the expected value of uaddr
2394 : : * @abs_time: absolute timeout
2395 : : * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
2396 : : * @uaddr2: the pi futex we will take prior to returning to user-space
2397 : : *
2398 : : * The caller will wait on uaddr and will be requeued by futex_requeue() to
2399 : : * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
2400 : : * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
2401 : : * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
2402 : : * without one, the pi logic would not know which task to boost/deboost, if
2403 : : * there was a need to.
2404 : : *
2405 : : * We call schedule in futex_wait_queue_me() when we enqueue and return there
2406 : : * via the following--
2407 : : * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
2408 : : * 2) wakeup on uaddr2 after a requeue
2409 : : * 3) signal
2410 : : * 4) timeout
2411 : : *
2412 : : * If 3, cleanup and return -ERESTARTNOINTR.
2413 : : *
2414 : : * If 2, we may then block on trying to take the rt_mutex and return via:
2415 : : * 5) successful lock
2416 : : * 6) signal
2417 : : * 7) timeout
2418 : : * 8) other lock acquisition failure
2419 : : *
2420 : : * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
2421 : : *
2422 : : * If 4 or 7, we cleanup and return with -ETIMEDOUT.
2423 : : *
2424 : : * Return:
2425 : : * 0 - On success;
2426 : : * <0 - On error
2427 : : */
2428 : 0 : static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2429 : : u32 val, ktime_t *abs_time, u32 bitset,
2430 : : u32 __user *uaddr2)
2431 : : {
2432 : : struct hrtimer_sleeper timeout, *to = NULL;
2433 : : struct rt_mutex_waiter rt_waiter;
2434 : 0 : struct rt_mutex *pi_mutex = NULL;
2435 : : struct futex_hash_bucket *hb;
2436 : 0 : union futex_key key2 = FUTEX_KEY_INIT;
2437 : 0 : struct futex_q q = futex_q_init;
2438 : : int res, ret;
2439 : :
2440 [ # # ]: 0 : if (uaddr == uaddr2)
2441 : : return -EINVAL;
2442 : :
2443 [ # # ]: 0 : if (!bitset)
2444 : : return -EINVAL;
2445 : :
2446 [ # # ]: 0 : if (abs_time) {
2447 : : to = &timeout;
2448 : 0 : hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2449 : : CLOCK_REALTIME : CLOCK_MONOTONIC,
2450 : : HRTIMER_MODE_ABS);
2451 : 0 : hrtimer_init_sleeper(to, current);
2452 : 0 : hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2453 : 0 : current->timer_slack_ns);
2454 : : }
2455 : :
2456 : : /*
2457 : : * The waiter is allocated on our stack, manipulated by the requeue
2458 : : * code while we sleep on uaddr.
2459 : : */
2460 : : debug_rt_mutex_init_waiter(&rt_waiter);
2461 : 0 : RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
2462 : 0 : RB_CLEAR_NODE(&rt_waiter.tree_entry);
2463 : 0 : rt_waiter.task = NULL;
2464 : :
2465 : 0 : ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2466 [ # # ]: 0 : if (unlikely(ret != 0))
2467 : : goto out;
2468 : :
2469 : 0 : q.bitset = bitset;
2470 : 0 : q.rt_waiter = &rt_waiter;
2471 : 0 : q.requeue_pi_key = &key2;
2472 : :
2473 : : /*
2474 : : * Prepare to wait on uaddr. On success, increments q.key (key1) ref
2475 : : * count.
2476 : : */
2477 : 0 : ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2478 [ # # ]: 0 : if (ret)
2479 : : goto out_key2;
2480 : :
2481 : : /* Queue the futex_q, drop the hb lock, wait for wakeup. */
2482 : 0 : futex_wait_queue_me(hb, &q, to);
2483 : :
2484 : 0 : spin_lock(&hb->lock);
2485 : 0 : ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2486 : 0 : spin_unlock(&hb->lock);
2487 [ # # ]: 0 : if (ret)
2488 : : goto out_put_keys;
2489 : :
2490 : : /*
2491 : : * In order for us to be here, we know our q.key == key2, and since
2492 : : * we took the hb->lock above, we also know that futex_requeue() has
2493 : : * completed and we no longer have to concern ourselves with a wakeup
2494 : : * race with the atomic proxy lock acquisition by the requeue code. The
2495 : : * futex_requeue dropped our key1 reference and incremented our key2
2496 : : * reference count.
2497 : : */
2498 : :
2499 : : /* Check if the requeue code acquired the second futex for us. */
2500 [ # # ]: 0 : if (!q.rt_waiter) {
2501 : : /*
2502 : : * Got the lock. We might not be the anticipated owner if we
2503 : : * did a lock-steal - fix up the PI-state in that case.
2504 : : */
2505 [ # # ][ # # ]: 0 : if (q.pi_state && (q.pi_state->owner != current)) {
2506 : 0 : spin_lock(q.lock_ptr);
2507 : 0 : ret = fixup_pi_state_owner(uaddr2, &q, current);
2508 : 0 : spin_unlock(q.lock_ptr);
2509 : : }
2510 : : } else {
2511 : : /*
2512 : : * We have been woken up by futex_unlock_pi(), a timeout, or a
2513 : : * signal. futex_unlock_pi() will not destroy the lock_ptr nor
2514 : : * the pi_state.
2515 : : */
2516 [ # # ]: 0 : WARN_ON(!q.pi_state);
2517 : 0 : pi_mutex = &q.pi_state->pi_mutex;
2518 : 0 : ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2519 : : debug_rt_mutex_free_waiter(&rt_waiter);
2520 : :
2521 : 0 : spin_lock(q.lock_ptr);
2522 : : /*
2523 : : * Fixup the pi_state owner and possibly acquire the lock if we
2524 : : * haven't already.
2525 : : */
2526 : 0 : res = fixup_owner(uaddr2, &q, !ret);
2527 : : /*
2528 : : * If fixup_owner() returned an error, proprogate that. If it
2529 : : * acquired the lock, clear -ETIMEDOUT or -EINTR.
2530 : : */
2531 [ # # ]: 0 : if (res)
2532 : 0 : ret = (res < 0) ? res : 0;
2533 : :
2534 : : /* Unqueue and drop the lock. */
2535 : 0 : unqueue_me_pi(&q);
2536 : : }
2537 : :
2538 : : /*
2539 : : * If fixup_pi_state_owner() faulted and was unable to handle the
2540 : : * fault, unlock the rt_mutex and return the fault to userspace.
2541 : : */
2542 [ # # ]: 0 : if (ret == -EFAULT) {
2543 [ # # ][ # # ]: 0 : if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2544 : 0 : rt_mutex_unlock(pi_mutex);
2545 [ # # ]: 0 : } else if (ret == -EINTR) {
2546 : : /*
2547 : : * We've already been requeued, but cannot restart by calling
2548 : : * futex_lock_pi() directly. We could restart this syscall, but
2549 : : * it would detect that the user space "val" changed and return
2550 : : * -EWOULDBLOCK. Save the overhead of the restart and return
2551 : : * -EWOULDBLOCK directly.
2552 : : */
2553 : : ret = -EWOULDBLOCK;
2554 : : }
2555 : :
2556 : : out_put_keys:
2557 : : put_futex_key(&q.key);
2558 : : out_key2:
2559 : : put_futex_key(&key2);
2560 : :
2561 : : out:
2562 [ # # ]: 0 : if (to) {
2563 : 0 : hrtimer_cancel(&to->timer);
2564 : : destroy_hrtimer_on_stack(&to->timer);
2565 : : }
2566 : 0 : return ret;
2567 : : }
2568 : :
2569 : : /*
2570 : : * Support for robust futexes: the kernel cleans up held futexes at
2571 : : * thread exit time.
2572 : : *
2573 : : * Implementation: user-space maintains a per-thread list of locks it
2574 : : * is holding. Upon do_exit(), the kernel carefully walks this list,
2575 : : * and marks all locks that are owned by this thread with the
2576 : : * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
2577 : : * always manipulated with the lock held, so the list is private and
2578 : : * per-thread. Userspace also maintains a per-thread 'list_op_pending'
2579 : : * field, to allow the kernel to clean up if the thread dies after
2580 : : * acquiring the lock, but just before it could have added itself to
2581 : : * the list. There can only be one such pending lock.
2582 : : */
2583 : :
2584 : : /**
2585 : : * sys_set_robust_list() - Set the robust-futex list head of a task
2586 : : * @head: pointer to the list-head
2587 : : * @len: length of the list-head, as userspace expects
2588 : : */
2589 : 0 : SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2590 : : size_t, len)
2591 : : {
2592 [ + ]: 11680 : if (!futex_cmpxchg_enabled)
2593 : : return -ENOSYS;
2594 : : /*
2595 : : * The kernel knows only one size for now:
2596 : : */
2597 [ + ]: 11684 : if (unlikely(len != sizeof(*head)))
2598 : : return -EINVAL;
2599 : :
2600 : 11688 : current->robust_list = head;
2601 : :
2602 : : return 0;
2603 : : }
2604 : :
2605 : : /**
2606 : : * sys_get_robust_list() - Get the robust-futex list head of a task
2607 : : * @pid: pid of the process [zero for current task]
2608 : : * @head_ptr: pointer to a list-head pointer, the kernel fills it in
2609 : : * @len_ptr: pointer to a length field, the kernel fills in the header size
2610 : : */
2611 : 0 : SYSCALL_DEFINE3(get_robust_list, int, pid,
2612 : : struct robust_list_head __user * __user *, head_ptr,
2613 : : size_t __user *, len_ptr)
2614 : : {
2615 : : struct robust_list_head __user *head;
2616 : : unsigned long ret;
2617 : : struct task_struct *p;
2618 : :
2619 [ + - ]: 5 : if (!futex_cmpxchg_enabled)
2620 : : return -ENOSYS;
2621 : :
2622 : : rcu_read_lock();
2623 : :
2624 : : ret = -ESRCH;
2625 [ + + ]: 5 : if (!pid)
2626 : 3 : p = current;
2627 : : else {
2628 : 2 : p = find_task_by_vpid(pid);
2629 [ + + ]: 2 : if (!p)
2630 : : goto err_unlock;
2631 : : }
2632 : :
2633 : : ret = -EPERM;
2634 [ + + ]: 4 : if (!ptrace_may_access(p, PTRACE_MODE_READ))
2635 : : goto err_unlock;
2636 : :
2637 : 3 : head = p->robust_list;
2638 : : rcu_read_unlock();
2639 : :
2640 [ + + ]: 3 : if (put_user(sizeof(*head), len_ptr))
2641 : : return -EFAULT;
2642 : 2 : return put_user(head, head_ptr);
2643 : :
2644 : : err_unlock:
2645 : : rcu_read_unlock();
2646 : :
2647 : 2 : return ret;
2648 : : }
2649 : :
2650 : : /*
2651 : : * Process a futex-list entry, check whether it's owned by the
2652 : : * dying task, and do notification if so:
2653 : : */
2654 : 1 : int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2655 : : {
2656 : : u32 uval, uninitialized_var(nval), mval;
2657 : :
2658 : : retry:
2659 [ - + ]: 1 : if (get_user(uval, uaddr))
2660 : : return -1;
2661 : :
2662 [ # # ]: 0 : if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2663 : : /*
2664 : : * Ok, this dying thread is truly holding a futex
2665 : : * of interest. Set the OWNER_DIED bit atomically
2666 : : * via cmpxchg, and if the value had FUTEX_WAITERS
2667 : : * set, wake up a waiter (if any). (We have to do a
2668 : : * futex_wake() even if OWNER_DIED is already set -
2669 : : * to handle the rare but possible case of recursive
2670 : : * thread-death.) The rest of the cleanup is done in
2671 : : * userspace.
2672 : : */
2673 : 0 : mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2674 : : /*
2675 : : * We are not holding a lock here, but we want to have
2676 : : * the pagefault_disable/enable() protection because
2677 : : * we want to handle the fault gracefully. If the
2678 : : * access fails we try to fault in the futex with R/W
2679 : : * verification via get_user_pages. get_user() above
2680 : : * does not guarantee R/W access. If that fails we
2681 : : * give up and leave the futex locked.
2682 : : */
2683 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2684 [ # # ]: 0 : if (fault_in_user_writeable(uaddr))
2685 : : return -1;
2686 : : goto retry;
2687 : : }
2688 [ # # ]: 0 : if (nval != uval)
2689 : : goto retry;
2690 : :
2691 : : /*
2692 : : * Wake robust non-PI futexes here. The wakeup of
2693 : : * PI futexes happens in exit_pi_state():
2694 : : */
2695 [ # # ]: 0 : if (!pi && (uval & FUTEX_WAITERS))
2696 : 0 : futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2697 : : }
2698 : : return 0;
2699 : : }
2700 : :
2701 : : /*
2702 : : * Fetch a robust-list pointer. Bit 0 signals PI futexes:
2703 : : */
2704 : : static inline int fetch_robust_entry(struct robust_list __user **entry,
2705 : : struct robust_list __user * __user *head,
2706 : : unsigned int *pi)
2707 : : {
2708 : : unsigned long uentry;
2709 : :
2710 [ + - ][ + - ]: 23380 : if (get_user(uentry, (unsigned long __user *)head))
[ + - ]
2711 : : return -EFAULT;
2712 : :
2713 : 23379 : *entry = (void __user *)(uentry & ~1UL);
2714 : 23379 : *pi = uentry & 1;
2715 : :
2716 : : return 0;
2717 : : }
2718 : :
2719 : : /*
2720 : : * Walk curr->robust_list (very carefully, it's a userspace list!)
2721 : : * and mark any locks found there dead, and notify any waiters.
2722 : : *
2723 : : * We silently return on any sign of list-walking problem.
2724 : : */
2725 : 0 : void exit_robust_list(struct task_struct *curr)
2726 : : {
2727 : 11690 : struct robust_list_head __user *head = curr->robust_list;
2728 : : struct robust_list __user *entry, *next_entry, *pending;
2729 : : unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2730 : : unsigned int uninitialized_var(next_pi);
2731 : : unsigned long futex_offset;
2732 : : int rc;
2733 : :
2734 [ + - ]: 11690 : if (!futex_cmpxchg_enabled)
2735 : : return;
2736 : :
2737 : : /*
2738 : : * Fetch the list head (which was registered earlier, via
2739 : : * sys_set_robust_list()):
2740 : : */
2741 [ + - ]: 11690 : if (fetch_robust_entry(&entry, &head->list.next, &pi))
2742 : : return;
2743 : : /*
2744 : : * Fetch the relative futex offset:
2745 : : */
2746 [ + + ]: 11690 : if (get_user(futex_offset, &head->futex_offset))
2747 : : return;
2748 : : /*
2749 : : * Fetch any possibly pending lock-add first, and handle it
2750 : : * if it exists:
2751 : : */
2752 [ + - ]: 11688 : if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2753 : : return;
2754 : :
2755 : : next_entry = NULL; /* avoid warning with gcc */
2756 [ + + ]: 11689 : while (entry != &head->list) {
2757 : : /*
2758 : : * Fetch the next entry in the list before calling
2759 : : * handle_futex_death:
2760 : : */
2761 : 1 : rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2762 : : /*
2763 : : * A pending lock might already be on the list, so
2764 : : * don't process it twice:
2765 : : */
2766 [ + - ]: 1 : if (entry != pending)
2767 [ - + ]: 1 : if (handle_futex_death((void __user *)entry + futex_offset,
2768 : : curr, pi))
2769 : : return;
2770 [ # # ]: 0 : if (rc)
2771 : : return;
2772 : : entry = next_entry;
2773 : : pi = next_pi;
2774 : : /*
2775 : : * Avoid excessively long or circular lists:
2776 : : */
2777 [ # # ]: 0 : if (!--limit)
2778 : : break;
2779 : :
2780 : 0 : cond_resched();
2781 : : }
2782 : :
2783 [ - + ]: 11688 : if (pending)
2784 : 0 : handle_futex_death((void __user *)pending + futex_offset,
2785 : : curr, pip);
2786 : : }
2787 : :
2788 : 0 : long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2789 : : u32 __user *uaddr2, u32 val2, u32 val3)
2790 : : {
2791 : 11981012 : int cmd = op & FUTEX_CMD_MASK;
2792 : : unsigned int flags = 0;
2793 : :
2794 [ + + ]: 11981012 : if (!(op & FUTEX_PRIVATE_FLAG))
2795 : : flags |= FLAGS_SHARED;
2796 : :
2797 [ + + ]: 11981012 : if (op & FUTEX_CLOCK_REALTIME) {
2798 : 493259 : flags |= FLAGS_CLOCKRT;
2799 [ + ][ + + ]: 11981012 : if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
[ + - ]
2800 : : return -ENOSYS;
2801 : : }
2802 : :
2803 : : switch (cmd) {
2804 : : case FUTEX_LOCK_PI:
2805 : : case FUTEX_UNLOCK_PI:
2806 : : case FUTEX_TRYLOCK_PI:
2807 : : case FUTEX_WAIT_REQUEUE_PI:
2808 : : case FUTEX_CMP_REQUEUE_PI:
2809 [ # # ]: 0 : if (!futex_cmpxchg_enabled)
2810 : : return -ENOSYS;
2811 : : }
2812 : :
2813 [ + + + - : 12307462 : switch (cmd) {
- + + - -
- - - - ]
2814 : : case FUTEX_WAIT:
2815 : 3486250 : val3 = FUTEX_BITSET_MATCH_ANY;
2816 : : case FUTEX_WAIT_BITSET:
2817 : 3979509 : return futex_wait(uaddr, flags, val, timeout, val3);
2818 : : case FUTEX_WAKE:
2819 : 7732806 : val3 = FUTEX_BITSET_MATCH_ANY;
2820 : : case FUTEX_WAKE_BITSET:
2821 : 7732806 : return futex_wake(uaddr, flags, val, val3);
2822 : : case FUTEX_REQUEUE:
2823 : 0 : return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2824 : : case FUTEX_CMP_REQUEUE:
2825 : 169 : return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2826 : : case FUTEX_WAKE_OP:
2827 : 594978 : return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2828 : : case FUTEX_LOCK_PI:
2829 : 0 : return futex_lock_pi(uaddr, flags, val, timeout, 0);
2830 : : case FUTEX_UNLOCK_PI:
2831 : 0 : return futex_unlock_pi(uaddr, flags);
2832 : : case FUTEX_TRYLOCK_PI:
2833 : 0 : return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2834 : : case FUTEX_WAIT_REQUEUE_PI:
2835 : 0 : val3 = FUTEX_BITSET_MATCH_ANY;
2836 : 0 : return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2837 : : uaddr2);
2838 : : case FUTEX_CMP_REQUEUE_PI:
2839 : 0 : return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2840 : : }
2841 : : return -ENOSYS;
2842 : : }
2843 : :
2844 : :
2845 : 0 : SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2846 : : struct timespec __user *, utime, u32 __user *, uaddr2,
2847 : : u32, val3)
2848 : : {
2849 : : struct timespec ts;
2850 : : ktime_t t, *tp = NULL;
2851 : : u32 val2 = 0;
2852 : 12147884 : int cmd = op & FUTEX_CMD_MASK;
2853 : :
2854 [ + + ][ + - ]: 12147884 : if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2855 [ + + ]: 1088406 : cmd == FUTEX_WAIT_BITSET ||
2856 : 1088406 : cmd == FUTEX_WAIT_REQUEUE_PI)) {
2857 [ + - ]: 493259 : if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2858 : : return -EFAULT;
2859 [ + - ]: 493259 : if (!timespec_valid(&ts))
2860 : : return -EINVAL;
2861 : :
2862 : 493259 : t = timespec_to_ktime(ts);
2863 [ - + ]: 493259 : if (cmd == FUTEX_WAIT)
2864 : 0 : t = ktime_add_safe(ktime_get(), t);
2865 : : tp = &t;
2866 : : }
2867 : : /*
2868 : : * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
2869 : : * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
2870 : : */
2871 [ + + ]: 12102678 : if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2872 [ + + ]: 12098260 : cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2873 : 595147 : val2 = (u32) (unsigned long) utime;
2874 : :
2875 : 12102678 : return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2876 : : }
2877 : :
2878 : 0 : static int __init futex_init(void)
2879 : : {
2880 : : u32 curval;
2881 : : unsigned int futex_shift;
2882 : : unsigned long i;
2883 : :
2884 : : #if CONFIG_BASE_SMALL
2885 : : futex_hashsize = 16;
2886 : : #else
2887 : 0 : futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
2888 : : #endif
2889 : :
2890 [ # # ]: 0 : futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
2891 : : futex_hashsize, 0,
2892 : : futex_hashsize < 256 ? HASH_SMALL : 0,
2893 : : &futex_shift, NULL,
2894 : : futex_hashsize, futex_hashsize);
2895 : 0 : futex_hashsize = 1UL << futex_shift;
2896 : : /*
2897 : : * This will fail and we want it. Some arch implementations do
2898 : : * runtime detection of the futex_atomic_cmpxchg_inatomic()
2899 : : * functionality. We want to know that before we call in any
2900 : : * of the complex code paths. Also we want to prevent
2901 : : * registration of robust lists in that case. NULL is
2902 : : * guaranteed to fault and we get -EFAULT on functional
2903 : : * implementation, the non-functional ones will return
2904 : : * -ENOSYS.
2905 : : */
2906 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2907 : 0 : futex_cmpxchg_enabled = 1;
2908 : :
2909 [ # # ]: 0 : for (i = 0; i < futex_hashsize; i++) {
2910 : 0 : atomic_set(&futex_queues[i].waiters, 0);
2911 : : plist_head_init(&futex_queues[i].chain);
2912 : 0 : spin_lock_init(&futex_queues[i].lock);
2913 : : }
2914 : :
2915 : 0 : return 0;
2916 : : }
2917 : : __initcall(futex_init);
|