Branch data Line data Source code
1 : : /*
2 : : * Fast Userspace Mutexes (which I call "Futexes!").
3 : : * (C) Rusty Russell, IBM 2002
4 : : *
5 : : * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
6 : : * (C) Copyright 2003 Red Hat Inc, All Rights Reserved
7 : : *
8 : : * Removed page pinning, fix privately mapped COW pages and other cleanups
9 : : * (C) Copyright 2003, 2004 Jamie Lokier
10 : : *
11 : : * Robust futex support started by Ingo Molnar
12 : : * (C) Copyright 2006 Red Hat Inc, All Rights Reserved
13 : : * Thanks to Thomas Gleixner for suggestions, analysis and fixes.
14 : : *
15 : : * PI-futex support started by Ingo Molnar and Thomas Gleixner
16 : : * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
17 : : * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
18 : : *
19 : : * PRIVATE futexes by Eric Dumazet
20 : : * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
21 : : *
22 : : * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
23 : : * Copyright (C) IBM Corporation, 2009
24 : : * Thanks to Thomas Gleixner for conceptual design and careful reviews.
25 : : *
26 : : * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
27 : : * enough at me, Linus for the original (flawed) idea, Matthew
28 : : * Kirkwood for proof-of-concept implementation.
29 : : *
30 : : * "The futexes are also cursed."
31 : : * "But they come in a choice of three flavours!"
32 : : *
33 : : * This program is free software; you can redistribute it and/or modify
34 : : * it under the terms of the GNU General Public License as published by
35 : : * the Free Software Foundation; either version 2 of the License, or
36 : : * (at your option) any later version.
37 : : *
38 : : * This program is distributed in the hope that it will be useful,
39 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
40 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41 : : * GNU General Public License for more details.
42 : : *
43 : : * You should have received a copy of the GNU General Public License
44 : : * along with this program; if not, write to the Free Software
45 : : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
46 : : */
47 : : #include <linux/slab.h>
48 : : #include <linux/poll.h>
49 : : #include <linux/fs.h>
50 : : #include <linux/file.h>
51 : : #include <linux/jhash.h>
52 : : #include <linux/init.h>
53 : : #include <linux/futex.h>
54 : : #include <linux/mount.h>
55 : : #include <linux/pagemap.h>
56 : : #include <linux/syscalls.h>
57 : : #include <linux/signal.h>
58 : : #include <linux/export.h>
59 : : #include <linux/magic.h>
60 : : #include <linux/pid.h>
61 : : #include <linux/nsproxy.h>
62 : : #include <linux/ptrace.h>
63 : : #include <linux/sched/rt.h>
64 : : #include <linux/hugetlb.h>
65 : : #include <linux/freezer.h>
66 : :
67 : : #include <asm/futex.h>
68 : :
69 : : #include "locking/rtmutex_common.h"
70 : :
71 : : int __read_mostly futex_cmpxchg_enabled;
72 : :
73 : : #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
74 : :
75 : : /*
76 : : * Futex flags used to encode options to functions and preserve them across
77 : : * restarts.
78 : : */
79 : : #define FLAGS_SHARED 0x01
80 : : #define FLAGS_CLOCKRT 0x02
81 : : #define FLAGS_HAS_TIMEOUT 0x04
82 : :
83 : : /*
84 : : * Priority Inheritance state:
85 : : */
86 : : struct futex_pi_state {
87 : : /*
88 : : * list of 'owned' pi_state instances - these have to be
89 : : * cleaned up in do_exit() if the task exits prematurely:
90 : : */
91 : : struct list_head list;
92 : :
93 : : /*
94 : : * The PI object:
95 : : */
96 : : struct rt_mutex pi_mutex;
97 : :
98 : : struct task_struct *owner;
99 : : atomic_t refcount;
100 : :
101 : : union futex_key key;
102 : : };
103 : :
104 : : /**
105 : : * struct futex_q - The hashed futex queue entry, one per waiting task
106 : : * @list: priority-sorted list of tasks waiting on this futex
107 : : * @task: the task waiting on the futex
108 : : * @lock_ptr: the hash bucket lock
109 : : * @key: the key the futex is hashed on
110 : : * @pi_state: optional priority inheritance state
111 : : * @rt_waiter: rt_waiter storage for use with requeue_pi
112 : : * @requeue_pi_key: the requeue_pi target futex key
113 : : * @bitset: bitset for the optional bitmasked wakeup
114 : : *
115 : : * We use this hashed waitqueue, instead of a normal wait_queue_t, so
116 : : * we can wake only the relevant ones (hashed queues may be shared).
117 : : *
118 : : * A futex_q has a woken state, just like tasks have TASK_RUNNING.
119 : : * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
120 : : * The order of wakeup is always to make the first condition true, then
121 : : * the second.
122 : : *
123 : : * PI futexes are typically woken before they are removed from the hash list via
124 : : * the rt_mutex code. See unqueue_me_pi().
125 : : */
126 : : struct futex_q {
127 : : struct plist_node list;
128 : :
129 : : struct task_struct *task;
130 : : spinlock_t *lock_ptr;
131 : : union futex_key key;
132 : : struct futex_pi_state *pi_state;
133 : : struct rt_mutex_waiter *rt_waiter;
134 : : union futex_key *requeue_pi_key;
135 : : u32 bitset;
136 : : };
137 : :
138 : : static const struct futex_q futex_q_init = {
139 : : /* list gets initialized in queue_me()*/
140 : : .key = FUTEX_KEY_INIT,
141 : : .bitset = FUTEX_BITSET_MATCH_ANY
142 : : };
143 : :
144 : : /*
145 : : * Hash buckets are shared by all the futex_keys that hash to the same
146 : : * location. Each key may have multiple futex_q structures, one for each task
147 : : * waiting on a futex.
148 : : */
149 : : struct futex_hash_bucket {
150 : : spinlock_t lock;
151 : : struct plist_head chain;
152 : : };
153 : :
154 : : static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
155 : :
156 : : /*
157 : : * We hash on the keys returned from get_futex_key (see below).
158 : : */
159 : 0 : static struct futex_hash_bucket *hash_futex(union futex_key *key)
160 : : {
161 : 23817706 : u32 hash = jhash2((u32*)&key->both.word,
162 : : (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
163 : 11908853 : key->both.offset);
164 : 11908853 : return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
165 : : }
166 : :
167 : : /*
168 : : * Return 1 if two futex_keys are equal, 0 otherwise.
169 : : */
170 : : static inline int match_futex(union futex_key *key1, union futex_key *key2)
171 : : {
172 : 0 : return (key1 && key2
173 [ # # ][ # # ]: 927425 : && key1->both.word == key2->both.word
[ + + ][ # # ]
[ # # ][ + - ]
[ # # ][ + + ]
[ # # ][ # # ]
174 [ # # ][ # # ]: 925286 : && key1->both.ptr == key2->both.ptr
[ + - ][ # # ]
[ # # ][ + - ]
[ # # ][ + ]
[ # # ][ # # ]
175 [ # # ][ # # ]: 1854846 : && key1->both.offset == key2->both.offset);
[ # # ][ + - ]
[ + + ][ # # ]
[ # # ][ # # ]
[ # # ][ + - ]
[ - + ][ # # ]
[ # # ][ + ]
[ + + ][ # # ]
[ # # ][ # # ]
[ # # ]
176 : : }
177 : :
178 : : /*
179 : : * Take a reference to the resource addressed by a key.
180 : : * Can be called while holding spinlocks.
181 : : *
182 : : */
183 : 11908220 : static void get_futex_key_refs(union futex_key *key)
184 : : {
185 [ + + ]: 11908220 : if (!key->both.ptr)
186 : 1 : return;
187 : :
188 [ - + + ]: 11907408 : switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
189 : : case FUT_OFF_INODE:
190 : 0 : ihold(key->shared.inode);
191 : : break;
192 : : case FUT_OFF_MMSHARED:
193 : 3387 : atomic_inc(&key->private.mm->mm_count);
194 : : break;
195 : : }
196 : : }
197 : :
198 : : /*
199 : : * Drop a reference to the resource addressed by a key.
200 : : * The hash bucket spinlock must not be held.
201 : : */
202 : 0 : static void drop_futex_key_refs(union futex_key *key)
203 : : {
204 [ - + ]: 11924903 : if (!key->both.ptr) {
205 : : /* If we're here then we tried to put a key we failed to get */
206 [ # # ][ # # ]: 0 : WARN_ON_ONCE(1);
207 : 11924903 : return;
208 : : }
209 : :
210 [ - + + ]: 11924903 : switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
211 : : case FUT_OFF_INODE:
212 : 0 : iput(key->shared.inode);
213 : : break;
214 : : case FUT_OFF_MMSHARED:
215 : : mmdrop(key->private.mm);
216 : : break;
217 : : }
218 : : }
219 : :
220 : : /**
221 : : * get_futex_key() - Get parameters which are the keys for a futex
222 : : * @uaddr: virtual address of the futex
223 : : * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
224 : : * @key: address where result is stored.
225 : : * @rw: mapping needs to be read/write (values: VERIFY_READ,
226 : : * VERIFY_WRITE)
227 : : *
228 : : * Return: a negative error code or 0
229 : : *
230 : : * The key words are stored in *key on success.
231 : : *
232 : : * For shared mappings, it's (page->index, file_inode(vma->vm_file),
233 : : * offset_within_page). For private mappings, it's (uaddr, current->mm).
234 : : * We can usually work out the index without swapping in the page.
235 : : *
236 : : * lock_page() might sleep, the caller should not hold a spinlock.
237 : : */
238 : : static int
239 : 0 : get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
240 : : {
241 : 11744270 : unsigned long address = (unsigned long)uaddr;
242 : 11744270 : struct mm_struct *mm = current->mm;
243 : : struct page *page, *page_head;
244 : : int err, ro = 0;
245 : :
246 : : /*
247 : : * The futex address must be "naturally" aligned.
248 : : */
249 : 11744270 : key->both.offset = address % PAGE_SIZE;
250 [ + ]: 11744270 : if (unlikely((address % sizeof(u32)) != 0))
251 : : return -EINVAL;
252 : 11826333 : address -= key->both.offset;
253 : :
254 [ + ]: 11826333 : if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
255 : : return -EFAULT;
256 : :
257 : : /*
258 : : * PROCESS_PRIVATE futexes are fast.
259 : : * As the mm cannot disappear under us and the 'key' only needs
260 : : * virtual address, we dont even have to find the underlying vma.
261 : : * Note : We do have to check 'uaddr' is a valid user address,
262 : : * but access_ok() should be faster than find_vma()
263 : : */
264 [ + + ]: 11828322 : if (!fshared) {
265 : 11824934 : key->private.mm = mm;
266 : 11824934 : key->private.address = address;
267 : 11824934 : get_futex_key_refs(key);
268 : 11910412 : return 0;
269 : : }
270 : :
271 : : again:
272 : 3388 : err = get_user_pages_fast(address, 1, 1, &page);
273 : : /*
274 : : * If write access is not required (eg. FUTEX_WAIT), try
275 : : * and get read-only access.
276 : : */
277 [ - + ]: 3388 : if (err == -EFAULT && rw == VERIFY_READ) {
278 : 0 : err = get_user_pages_fast(address, 1, 0, &page);
279 : : ro = 1;
280 : : }
281 [ + - ]: 3388 : if (err < 0)
282 : : return err;
283 : : else
284 : : err = 0;
285 : :
286 : : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
287 : : page_head = page;
288 : : if (unlikely(PageTail(page))) {
289 : : put_page(page);
290 : : /* serialize against __split_huge_page_splitting() */
291 : : local_irq_disable();
292 : : if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
293 : : page_head = compound_head(page);
294 : : /*
295 : : * page_head is valid pointer but we must pin
296 : : * it before taking the PG_lock and/or
297 : : * PG_compound_lock. The moment we re-enable
298 : : * irqs __split_huge_page_splitting() can
299 : : * return and the head page can be freed from
300 : : * under us. We can't take the PG_lock and/or
301 : : * PG_compound_lock on a page that could be
302 : : * freed from under us.
303 : : */
304 : : if (page != page_head) {
305 : : get_page(page_head);
306 : : put_page(page);
307 : : }
308 : : local_irq_enable();
309 : : } else {
310 : : local_irq_enable();
311 : : goto again;
312 : : }
313 : : }
314 : : #else
315 : 3388 : page_head = compound_head(page);
316 [ - + ]: 3388 : if (page != page_head) {
317 : : get_page(page_head);
318 : 0 : put_page(page);
319 : : }
320 : : #endif
321 : :
322 : : lock_page(page_head);
323 : :
324 : : /*
325 : : * If page_head->mapping is NULL, then it cannot be a PageAnon
326 : : * page; but it might be the ZERO_PAGE or in the gate area or
327 : : * in a special mapping (all cases which we are happy to fail);
328 : : * or it may have been a good file page when get_user_pages_fast
329 : : * found it, but truncated or holepunched or subjected to
330 : : * invalidate_complete_page2 before we got the page lock (also
331 : : * cases which we are happy to fail). And we hold a reference,
332 : : * so refcount care in invalidate_complete_page's remove_mapping
333 : : * prevents drop_caches from setting mapping to NULL beneath us.
334 : : *
335 : : * The case we do have to guard against is when memory pressure made
336 : : * shmem_writepage move it from filecache to swapcache beneath us:
337 : : * an unlikely race, but we do need to retry for page_head->mapping.
338 : : */
339 [ - + ]: 11747658 : if (!page_head->mapping) {
340 : : int shmem_swizzled = PageSwapCache(page_head);
341 : 0 : unlock_page(page_head);
342 : 0 : put_page(page_head);
343 [ # # ]: 0 : if (shmem_swizzled)
344 : : goto again;
345 : : return -EFAULT;
346 : : }
347 : :
348 : : /*
349 : : * Private mappings are handled in a simple way.
350 : : *
351 : : * NOTE: When userspace waits on a MAP_SHARED mapping, even if
352 : : * it's a read-only handle, it's expected that futexes attach to
353 : : * the object not the particular process.
354 : : */
355 [ + - ]: 3388 : if (PageAnon(page_head)) {
356 : : /*
357 : : * A RO anonymous page will never change and thus doesn't make
358 : : * sense for futex operations.
359 : : */
360 [ + - ]: 3388 : if (ro) {
361 : : err = -EFAULT;
362 : : goto out;
363 : : }
364 : :
365 : 3388 : key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
366 : 3388 : key->private.mm = mm;
367 : 3388 : key->private.address = address;
368 : : } else {
369 : 0 : key->both.offset |= FUT_OFF_INODE; /* inode-based key */
370 : 0 : key->shared.inode = page_head->mapping->host;
371 : 0 : key->shared.pgoff = basepage_index(page);
372 : : }
373 : :
374 : 3388 : get_futex_key_refs(key);
375 : :
376 : : out:
377 : 3386 : unlock_page(page_head);
378 : 3387 : put_page(page_head);
379 : 3388 : return err;
380 : : }
381 : :
382 : : static inline void put_futex_key(union futex_key *key)
383 : : {
384 : 10391875 : drop_futex_key_refs(key);
385 : : }
386 : :
387 : : /**
388 : : * fault_in_user_writeable() - Fault in user address and verify RW access
389 : : * @uaddr: pointer to faulting user space address
390 : : *
391 : : * Slow path to fixup the fault we just took in the atomic write
392 : : * access to @uaddr.
393 : : *
394 : : * We have no generic implementation of a non-destructive write to the
395 : : * user address. We know that we faulted in the atomic pagefault
396 : : * disabled section so we can as well avoid the #PF overhead by
397 : : * calling get_user_pages() right away.
398 : : */
399 : 0 : static int fault_in_user_writeable(u32 __user *uaddr)
400 : : {
401 : 0 : struct mm_struct *mm = current->mm;
402 : : int ret;
403 : :
404 : 0 : down_read(&mm->mmap_sem);
405 : 0 : ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
406 : : FAULT_FLAG_WRITE);
407 : 0 : up_read(&mm->mmap_sem);
408 : :
409 : 0 : return ret < 0 ? ret : 0;
410 : : }
411 : :
412 : : /**
413 : : * futex_top_waiter() - Return the highest priority waiter on a futex
414 : : * @hb: the hash bucket the futex_q's reside in
415 : : * @key: the futex key (to distinguish it from other futex futex_q's)
416 : : *
417 : : * Must be called with the hb lock held.
418 : : */
419 : 0 : static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
420 : : union futex_key *key)
421 : : {
422 : : struct futex_q *this;
423 : :
424 [ # # ]: 0 : plist_for_each_entry(this, &hb->chain, list) {
425 [ # # ]: 0 : if (match_futex(&this->key, key))
426 : : return this;
427 : : }
428 : : return NULL;
429 : : }
430 : :
431 : 0 : static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
432 : : u32 uval, u32 newval)
433 : : {
434 : : int ret;
435 : :
436 : : pagefault_disable();
437 : : ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
438 : : pagefault_enable();
439 : :
440 : 0 : return ret;
441 : : }
442 : :
443 : 0 : static int get_futex_value_locked(u32 *dest, u32 __user *from)
444 : : {
445 : : int ret;
446 : :
447 : : pagefault_disable();
448 : 3570234 : ret = __copy_from_user_inatomic(dest, from, sizeof(u32));
449 : : pagefault_enable();
450 : :
451 [ + ]: 3570281 : return ret ? -EFAULT : 0;
452 : : }
453 : :
454 : :
455 : : /*
456 : : * PI code:
457 : : */
458 : 0 : static int refill_pi_state_cache(void)
459 : : {
460 : : struct futex_pi_state *pi_state;
461 : :
462 [ # # ]: 0 : if (likely(current->pi_state_cache))
463 : : return 0;
464 : :
465 : : pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
466 : :
467 [ # # ]: 0 : if (!pi_state)
468 : : return -ENOMEM;
469 : :
470 : 0 : INIT_LIST_HEAD(&pi_state->list);
471 : : /* pi_mutex gets initialized later */
472 : 0 : pi_state->owner = NULL;
473 : 0 : atomic_set(&pi_state->refcount, 1);
474 : 0 : pi_state->key = FUTEX_KEY_INIT;
475 : :
476 : 0 : current->pi_state_cache = pi_state;
477 : :
478 : 0 : return 0;
479 : : }
480 : :
481 : 0 : static struct futex_pi_state * alloc_pi_state(void)
482 : : {
483 : 0 : struct futex_pi_state *pi_state = current->pi_state_cache;
484 : :
485 [ # # ]: 0 : WARN_ON(!pi_state);
486 : 0 : current->pi_state_cache = NULL;
487 : :
488 : 0 : return pi_state;
489 : : }
490 : :
491 : 0 : static void free_pi_state(struct futex_pi_state *pi_state)
492 : : {
493 [ # # ]: 0 : if (!atomic_dec_and_test(&pi_state->refcount))
494 : 0 : return;
495 : :
496 : : /*
497 : : * If pi_state->owner is NULL, the owner is most probably dying
498 : : * and has cleaned up the pi_state already
499 : : */
500 [ # # ]: 0 : if (pi_state->owner) {
501 : 0 : raw_spin_lock_irq(&pi_state->owner->pi_lock);
502 : 0 : list_del_init(&pi_state->list);
503 : 0 : raw_spin_unlock_irq(&pi_state->owner->pi_lock);
504 : :
505 : 0 : rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
506 : : }
507 : :
508 [ # # ]: 0 : if (current->pi_state_cache)
509 : 0 : kfree(pi_state);
510 : : else {
511 : : /*
512 : : * pi_state->list is already empty.
513 : : * clear pi_state->owner.
514 : : * refcount is at 0 - put it back to 1.
515 : : */
516 : 0 : pi_state->owner = NULL;
517 : 0 : atomic_set(&pi_state->refcount, 1);
518 : 0 : current->pi_state_cache = pi_state;
519 : : }
520 : : }
521 : :
522 : : /*
523 : : * Look up the task based on what TID userspace gave us.
524 : : * We dont trust it.
525 : : */
526 : 0 : static struct task_struct * futex_find_get_task(pid_t pid)
527 : : {
528 : : struct task_struct *p;
529 : :
530 : : rcu_read_lock();
531 : 0 : p = find_task_by_vpid(pid);
532 [ # # ]: 0 : if (p)
533 : 0 : get_task_struct(p);
534 : :
535 : : rcu_read_unlock();
536 : :
537 : 0 : return p;
538 : : }
539 : :
540 : : /*
541 : : * This task is holding PI mutexes at exit time => bad.
542 : : * Kernel cleans up PI-state, but userspace is likely hosed.
543 : : * (Robust-futex cleanup is separate and might save the day for userspace.)
544 : : */
545 : 0 : void exit_pi_state_list(struct task_struct *curr)
546 : : {
547 : 0 : struct list_head *next, *head = &curr->pi_state_list;
548 : : struct futex_pi_state *pi_state;
549 : : struct futex_hash_bucket *hb;
550 : 0 : union futex_key key = FUTEX_KEY_INIT;
551 : :
552 [ # # ]: 0 : if (!futex_cmpxchg_enabled)
553 : 0 : return;
554 : : /*
555 : : * We are a ZOMBIE and nobody can enqueue itself on
556 : : * pi_state_list anymore, but we have to be careful
557 : : * versus waiters unqueueing themselves:
558 : : */
559 : 0 : raw_spin_lock_irq(&curr->pi_lock);
560 [ # # ]: 0 : while (!list_empty(head)) {
561 : :
562 : : next = head->next;
563 : : pi_state = list_entry(next, struct futex_pi_state, list);
564 : 0 : key = pi_state->key;
565 : 0 : hb = hash_futex(&key);
566 : : raw_spin_unlock_irq(&curr->pi_lock);
567 : :
568 : : spin_lock(&hb->lock);
569 : :
570 : 0 : raw_spin_lock_irq(&curr->pi_lock);
571 : : /*
572 : : * We dropped the pi-lock, so re-check whether this
573 : : * task still owns the PI-state:
574 : : */
575 [ # # ]: 0 : if (head->next != next) {
576 : : spin_unlock(&hb->lock);
577 : 0 : continue;
578 : : }
579 : :
580 [ # # ]: 0 : WARN_ON(pi_state->owner != curr);
581 [ # # ]: 0 : WARN_ON(list_empty(&pi_state->list));
582 : : list_del_init(&pi_state->list);
583 : 0 : pi_state->owner = NULL;
584 : : raw_spin_unlock_irq(&curr->pi_lock);
585 : :
586 : 0 : rt_mutex_unlock(&pi_state->pi_mutex);
587 : :
588 : : spin_unlock(&hb->lock);
589 : :
590 : 0 : raw_spin_lock_irq(&curr->pi_lock);
591 : : }
592 : : raw_spin_unlock_irq(&curr->pi_lock);
593 : : }
594 : :
595 : : static int
596 : 0 : lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
597 : : union futex_key *key, struct futex_pi_state **ps)
598 : : {
599 : : struct futex_pi_state *pi_state = NULL;
600 : : struct futex_q *this, *next;
601 : : struct plist_head *head;
602 : : struct task_struct *p;
603 : 0 : pid_t pid = uval & FUTEX_TID_MASK;
604 : :
605 : : head = &hb->chain;
606 : :
607 [ # # ]: 0 : plist_for_each_entry_safe(this, next, head, list) {
608 [ # # ]: 0 : if (match_futex(&this->key, key)) {
609 : : /*
610 : : * Another waiter already exists - bump up
611 : : * the refcount and return its pi_state:
612 : : */
613 : 0 : pi_state = this->pi_state;
614 : : /*
615 : : * Userspace might have messed up non-PI and PI futexes
616 : : */
617 [ # # ]: 0 : if (unlikely(!pi_state))
618 : : return -EINVAL;
619 : :
620 [ # # ]: 0 : WARN_ON(!atomic_read(&pi_state->refcount));
621 : :
622 : : /*
623 : : * When pi_state->owner is NULL then the owner died
624 : : * and another waiter is on the fly. pi_state->owner
625 : : * is fixed up by the task which acquires
626 : : * pi_state->rt_mutex.
627 : : *
628 : : * We do not check for pid == 0 which can happen when
629 : : * the owner died and robust_list_exit() cleared the
630 : : * TID.
631 : : */
632 [ # # ][ # # ]: 0 : if (pid && pi_state->owner) {
633 : : /*
634 : : * Bail out if user space manipulated the
635 : : * futex value.
636 : : */
637 [ # # ]: 0 : if (pid != task_pid_vnr(pi_state->owner))
638 : : return -EINVAL;
639 : : }
640 : :
641 : 0 : atomic_inc(&pi_state->refcount);
642 : 0 : *ps = pi_state;
643 : :
644 : 0 : return 0;
645 : : }
646 : : }
647 : :
648 : : /*
649 : : * We are the first waiter - try to look up the real owner and attach
650 : : * the new pi_state to it, but bail out when TID = 0
651 : : */
652 [ # # ]: 0 : if (!pid)
653 : : return -ESRCH;
654 : 0 : p = futex_find_get_task(pid);
655 [ # # ]: 0 : if (!p)
656 : : return -ESRCH;
657 : :
658 : : /*
659 : : * We need to look at the task state flags to figure out,
660 : : * whether the task is exiting. To protect against the do_exit
661 : : * change of the task flags, we do this protected by
662 : : * p->pi_lock:
663 : : */
664 : 0 : raw_spin_lock_irq(&p->pi_lock);
665 [ # # ]: 0 : if (unlikely(p->flags & PF_EXITING)) {
666 : : /*
667 : : * The task is on the way out. When PF_EXITPIDONE is
668 : : * set, we know that the task has finished the
669 : : * cleanup:
670 : : */
671 [ # # ]: 0 : int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
672 : :
673 : : raw_spin_unlock_irq(&p->pi_lock);
674 : : put_task_struct(p);
675 : 0 : return ret;
676 : : }
677 : :
678 : 0 : pi_state = alloc_pi_state();
679 : :
680 : : /*
681 : : * Initialize the pi_mutex in locked state and make 'p'
682 : : * the owner of it:
683 : : */
684 : 0 : rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
685 : :
686 : : /* Store the key for possible exit cleanups: */
687 : 0 : pi_state->key = *key;
688 : :
689 [ # # ]: 0 : WARN_ON(!list_empty(&pi_state->list));
690 : 0 : list_add(&pi_state->list, &p->pi_state_list);
691 : 0 : pi_state->owner = p;
692 : : raw_spin_unlock_irq(&p->pi_lock);
693 : :
694 : : put_task_struct(p);
695 : :
696 : 0 : *ps = pi_state;
697 : :
698 : 0 : return 0;
699 : : }
700 : :
701 : : /**
702 : : * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
703 : : * @uaddr: the pi futex user address
704 : : * @hb: the pi futex hash bucket
705 : : * @key: the futex key associated with uaddr and hb
706 : : * @ps: the pi_state pointer where we store the result of the
707 : : * lookup
708 : : * @task: the task to perform the atomic lock work for. This will
709 : : * be "current" except in the case of requeue pi.
710 : : * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
711 : : *
712 : : * Return:
713 : : * 0 - ready to wait;
714 : : * 1 - acquired the lock;
715 : : * <0 - error
716 : : *
717 : : * The hb->lock and futex_key refs shall be held by the caller.
718 : : */
719 : 0 : static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
720 : : union futex_key *key,
721 : : struct futex_pi_state **ps,
722 : : struct task_struct *task, int set_waiters)
723 : : {
724 : : int lock_taken, ret, force_take = 0;
725 : 0 : u32 uval, newval, curval, vpid = task_pid_vnr(task);
726 : :
727 : : retry:
728 : : ret = lock_taken = 0;
729 : :
730 : : /*
731 : : * To avoid races, we attempt to take the lock here again
732 : : * (by doing a 0 -> TID atomic cmpxchg), while holding all
733 : : * the locks. It will most likely not succeed.
734 : : */
735 : : newval = vpid;
736 [ # # ]: 0 : if (set_waiters)
737 : 0 : newval |= FUTEX_WAITERS;
738 : :
739 [ # # ]: 0 : if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
740 : : return -EFAULT;
741 : :
742 : : /*
743 : : * Detect deadlocks.
744 : : */
745 [ # # ]: 0 : if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
746 : : return -EDEADLK;
747 : :
748 : : /*
749 : : * Surprise - we got the lock. Just return to userspace:
750 : : */
751 [ # # ]: 0 : if (unlikely(!curval))
752 : : return 1;
753 : :
754 : : uval = curval;
755 : :
756 : : /*
757 : : * Set the FUTEX_WAITERS flag, so the owner will know it has someone
758 : : * to wake at the next unlock.
759 : : */
760 : 0 : newval = curval | FUTEX_WAITERS;
761 : :
762 : : /*
763 : : * Should we force take the futex? See below.
764 : : */
765 [ # # ]: 0 : if (unlikely(force_take)) {
766 : : /*
767 : : * Keep the OWNER_DIED and the WAITERS bit and set the
768 : : * new TID value.
769 : : */
770 : 0 : newval = (curval & ~FUTEX_TID_MASK) | vpid;
771 : : force_take = 0;
772 : : lock_taken = 1;
773 : : }
774 : :
775 [ # # ]: 0 : if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
776 : : return -EFAULT;
777 [ # # ]: 0 : if (unlikely(curval != uval))
778 : : goto retry;
779 : :
780 : : /*
781 : : * We took the lock due to forced take over.
782 : : */
783 [ # # ]: 0 : if (unlikely(lock_taken))
784 : : return 1;
785 : :
786 : : /*
787 : : * We dont have the lock. Look up the PI state (or create it if
788 : : * we are the first waiter):
789 : : */
790 : 0 : ret = lookup_pi_state(uval, hb, key, ps);
791 : :
792 [ # # ]: 0 : if (unlikely(ret)) {
793 [ # # ]: 0 : switch (ret) {
794 : : case -ESRCH:
795 : : /*
796 : : * We failed to find an owner for this
797 : : * futex. So we have no pi_state to block
798 : : * on. This can happen in two cases:
799 : : *
800 : : * 1) The owner died
801 : : * 2) A stale FUTEX_WAITERS bit
802 : : *
803 : : * Re-read the futex value.
804 : : */
805 [ # # ]: 0 : if (get_futex_value_locked(&curval, uaddr))
806 : : return -EFAULT;
807 : :
808 : : /*
809 : : * If the owner died or we have a stale
810 : : * WAITERS bit the owner TID in the user space
811 : : * futex is 0.
812 : : */
813 [ # # ]: 0 : if (!(curval & FUTEX_TID_MASK)) {
814 : : force_take = 1;
815 : : goto retry;
816 : : }
817 : : default:
818 : : break;
819 : : }
820 : : }
821 : :
822 : 0 : return ret;
823 : : }
824 : :
825 : : /**
826 : : * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
827 : : * @q: The futex_q to unqueue
828 : : *
829 : : * The q->lock_ptr must not be NULL and must be held by the caller.
830 : : */
831 : 0 : static void __unqueue_futex(struct futex_q *q)
832 : : {
833 : : struct futex_hash_bucket *hb;
834 : :
835 [ + ][ - + ]: 1025147 : if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
[ # # ][ + - ]
836 [ - + ][ + - ]: 1025154 : || WARN_ON(plist_node_empty(&q->list)))
837 : 1025162 : return;
838 : :
839 : 1025151 : hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
840 : 1025151 : plist_del(&q->list, &hb->chain);
841 : : }
842 : :
843 : : /*
844 : : * The hash bucket lock must be held when this is called.
845 : : * Afterwards, the futex_q must not be accessed.
846 : : */
847 : 0 : static void wake_futex(struct futex_q *q)
848 : : {
849 : 925134 : struct task_struct *p = q->task;
850 : :
851 [ + + ][ + ]: 925134 : if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
[ - + ][ + - ]
852 : 925175 : return;
853 : :
854 : : /*
855 : : * We set q->lock_ptr = NULL _before_ we wake up the task. If
856 : : * a non-futex wake up happens on another CPU then the task
857 : : * might exit and p would dereference a non-existing task
858 : : * struct. Prevent this by holding a reference on p across the
859 : : * wake up.
860 : : */
861 : 925149 : get_task_struct(p);
862 : :
863 : 925176 : __unqueue_futex(q);
864 : : /*
865 : : * The waiting task can free the futex_q as soon as
866 : : * q->lock_ptr = NULL is written, without taking any locks. A
867 : : * memory barrier is required here to prevent the following
868 : : * store to lock_ptr from getting ahead of the plist_del.
869 : : */
870 : 925141 : smp_wmb();
871 : 925053 : q->lock_ptr = NULL;
872 : :
873 : 925053 : wake_up_state(p, TASK_NORMAL);
874 : : put_task_struct(p);
875 : : }
876 : :
877 : 0 : static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
878 : : {
879 : : struct task_struct *new_owner;
880 : 0 : struct futex_pi_state *pi_state = this->pi_state;
881 : : u32 uninitialized_var(curval), newval;
882 : :
883 [ # # ]: 0 : if (!pi_state)
884 : : return -EINVAL;
885 : :
886 : : /*
887 : : * If current does not own the pi_state then the futex is
888 : : * inconsistent and user space fiddled with the futex value.
889 : : */
890 [ # # ]: 0 : if (pi_state->owner != current)
891 : : return -EINVAL;
892 : :
893 : 0 : raw_spin_lock(&pi_state->pi_mutex.wait_lock);
894 : 0 : new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
895 : :
896 : : /*
897 : : * It is possible that the next waiter (the one that brought
898 : : * this owner to the kernel) timed out and is no longer
899 : : * waiting on the lock.
900 : : */
901 [ # # ]: 0 : if (!new_owner)
902 : 0 : new_owner = this->task;
903 : :
904 : : /*
905 : : * We pass it to the next owner. (The WAITERS bit is always
906 : : * kept enabled while there is PI state around. We must also
907 : : * preserve the owner died bit.)
908 : : */
909 [ # # ]: 0 : if (!(uval & FUTEX_OWNER_DIED)) {
910 : : int ret = 0;
911 : :
912 : 0 : newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
913 : :
914 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
915 : : ret = -EFAULT;
916 [ # # ]: 0 : else if (curval != uval)
917 : : ret = -EINVAL;
918 [ # # ]: 0 : if (ret) {
919 : : raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
920 : : return ret;
921 : : }
922 : : }
923 : :
924 : 0 : raw_spin_lock_irq(&pi_state->owner->pi_lock);
925 [ # # ]: 0 : WARN_ON(list_empty(&pi_state->list));
926 : : list_del_init(&pi_state->list);
927 : 0 : raw_spin_unlock_irq(&pi_state->owner->pi_lock);
928 : :
929 : 0 : raw_spin_lock_irq(&new_owner->pi_lock);
930 [ # # ]: 0 : WARN_ON(!list_empty(&pi_state->list));
931 : 0 : list_add(&pi_state->list, &new_owner->pi_state_list);
932 : 0 : pi_state->owner = new_owner;
933 : : raw_spin_unlock_irq(&new_owner->pi_lock);
934 : :
935 : : raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
936 : 0 : rt_mutex_unlock(&pi_state->pi_mutex);
937 : :
938 : : return 0;
939 : : }
940 : :
941 : 0 : static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
942 : : {
943 : : u32 uninitialized_var(oldval);
944 : :
945 : : /*
946 : : * There is no waiter, so we unlock the futex. The owner died
947 : : * bit has not to be preserved here. We are the owner:
948 : : */
949 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
950 : : return -EFAULT;
951 [ # # ]: 0 : if (oldval != uval)
952 : : return -EAGAIN;
953 : :
954 : 0 : return 0;
955 : : }
956 : :
957 : : /*
958 : : * Express the locking dependencies for lockdep:
959 : : */
960 : : static inline void
961 : : double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
962 : : {
963 [ + + ][ + + ]: 508065 : if (hb1 <= hb2) {
964 : : spin_lock(&hb1->lock);
965 [ + - + - ]: 157106 : if (hb1 < hb2)
966 : 157106 : spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
967 : : } else { /* hb1 > hb2 */
968 : : spin_lock(&hb2->lock);
969 : 350959 : spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
970 : : }
971 : : }
972 : :
973 : : static inline void
974 : : double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
975 : : {
976 : : spin_unlock(&hb1->lock);
977 [ # # # # : 508065 : if (hb1 != hb2)
# # + - #
# + - ]
978 : : spin_unlock(&hb2->lock);
979 : : }
980 : :
981 : : /*
982 : : * Wake up waiters matching bitset queued on this futex (uaddr).
983 : : */
984 : : static int
985 : 0 : futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
986 : : {
987 : : struct futex_hash_bucket *hb;
988 : : struct futex_q *this, *next;
989 : : struct plist_head *head;
990 : 7338715 : union futex_key key = FUTEX_KEY_INIT;
991 : : int ret;
992 : :
993 [ + + ]: 7338715 : if (!bitset)
994 : : return -EINVAL;
995 : :
996 : 7338591 : ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
997 [ + ]: 7337983 : if (unlikely(ret != 0))
998 : : goto out;
999 : :
1000 : 7338011 : hb = hash_futex(&key);
1001 : : spin_lock(&hb->lock);
1002 : : head = &hb->chain;
1003 : :
1004 [ + + ]: 7340854 : plist_for_each_entry_safe(this, next, head, list) {
1005 [ + + ]: 419185 : if (match_futex (&this->key, &key)) {
1006 [ + + ][ + ]: 417108 : if (this->pi_state || this->rt_waiter) {
1007 : : ret = -EINVAL;
1008 : : break;
1009 : : }
1010 : :
1011 : : /* Check if one of the bits is set in both bitsets */
1012 [ - + ]: 417108 : if (!(this->bitset & bitset))
1013 : 0 : continue;
1014 : :
1015 : 417108 : wake_futex(this);
1016 [ + + ]: 417133 : if (++ret >= nr_wake)
1017 : : break;
1018 : : }
1019 : : }
1020 : :
1021 : : spin_unlock(&hb->lock);
1022 : 7338819 : put_futex_key(&key);
1023 : : out:
1024 : 7338813 : return ret;
1025 : : }
1026 : :
1027 : : /*
1028 : : * Wake up all waiters hashed on the physical page that is mapped
1029 : : * to this virtual address:
1030 : : */
1031 : : static int
1032 : 0 : futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
1033 : : int nr_wake, int nr_wake2, int op)
1034 : : {
1035 : 507892 : union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1036 : : struct futex_hash_bucket *hb1, *hb2;
1037 : : struct plist_head *head;
1038 : : struct futex_q *this, *next;
1039 : : int ret, op_ret;
1040 : :
1041 : : retry:
1042 : 507892 : ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1043 [ + - ]: 507892 : if (unlikely(ret != 0))
1044 : : goto out;
1045 : 507892 : ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
1046 [ + - ]: 507892 : if (unlikely(ret != 0))
1047 : : goto out_put_key1;
1048 : :
1049 : 507892 : hb1 = hash_futex(&key1);
1050 : 507892 : hb2 = hash_futex(&key2);
1051 : :
1052 : : retry_private:
1053 : : double_lock_hb(hb1, hb2);
1054 : : op_ret = futex_atomic_op_inuser(op, uaddr2);
1055 [ - + ]: 507892 : if (unlikely(op_ret < 0)) {
1056 : :
1057 : : double_unlock_hb(hb1, hb2);
1058 : :
1059 : : #ifndef CONFIG_MMU
1060 : : /*
1061 : : * we don't get EFAULT from MMU faults if we don't have an MMU,
1062 : : * but we might get them from range checking
1063 : : */
1064 : : ret = op_ret;
1065 : : goto out_put_keys;
1066 : : #endif
1067 : :
1068 [ # # ]: 0 : if (unlikely(op_ret != -EFAULT)) {
1069 : : ret = op_ret;
1070 : : goto out_put_keys;
1071 : : }
1072 : :
1073 : 0 : ret = fault_in_user_writeable(uaddr2);
1074 [ # # ]: 0 : if (ret)
1075 : : goto out_put_keys;
1076 : :
1077 [ # # ]: 0 : if (!(flags & FLAGS_SHARED))
1078 : : goto retry_private;
1079 : :
1080 : 0 : put_futex_key(&key2);
1081 : 0 : put_futex_key(&key1);
1082 : : goto retry;
1083 : : }
1084 : :
1085 : : head = &hb1->chain;
1086 : :
1087 [ + + ]: 507892 : plist_for_each_entry_safe(this, next, head, list) {
1088 [ + - ]: 507880 : if (match_futex (&this->key, &key1)) {
1089 [ + - ][ + - ]: 507880 : if (this->pi_state || this->rt_waiter) {
1090 : : ret = -EINVAL;
1091 : : goto out_unlock;
1092 : : }
1093 : 507880 : wake_futex(this);
1094 [ - + ]: 507880 : if (++ret >= nr_wake)
1095 : : break;
1096 : : }
1097 : : }
1098 : :
1099 [ + + ]: 507892 : if (op_ret > 0) {
1100 : : head = &hb2->chain;
1101 : :
1102 : : op_ret = 0;
1103 [ - + ]: 7 : plist_for_each_entry_safe(this, next, head, list) {
1104 [ # # ]: 0 : if (match_futex (&this->key, &key2)) {
1105 [ # # ][ # # ]: 0 : if (this->pi_state || this->rt_waiter) {
1106 : : ret = -EINVAL;
1107 : : goto out_unlock;
1108 : : }
1109 : 0 : wake_futex(this);
1110 [ # # ]: 0 : if (++op_ret >= nr_wake2)
1111 : : break;
1112 : : }
1113 : : }
1114 : 7 : ret += op_ret;
1115 : : }
1116 : :
1117 : : out_unlock:
1118 : : double_unlock_hb(hb1, hb2);
1119 : : out_put_keys:
1120 : 507892 : put_futex_key(&key2);
1121 : : out_put_key1:
1122 : 507892 : put_futex_key(&key1);
1123 : : out:
1124 : 507892 : return ret;
1125 : : }
1126 : :
1127 : : /**
1128 : : * requeue_futex() - Requeue a futex_q from one hb to another
1129 : : * @q: the futex_q to requeue
1130 : : * @hb1: the source hash_bucket
1131 : : * @hb2: the target hash_bucket
1132 : : * @key2: the new key for the requeued futex_q
1133 : : */
1134 : : static inline
1135 : : void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1136 : : struct futex_hash_bucket *hb2, union futex_key *key2)
1137 : : {
1138 : :
1139 : : /*
1140 : : * If key1 and key2 hash to the same bucket, no need to
1141 : : * requeue.
1142 : : */
1143 [ + - ]: 167 : if (likely(&hb1->chain != &hb2->chain)) {
1144 : 167 : plist_del(&q->list, &hb1->chain);
1145 : 167 : plist_add(&q->list, &hb2->chain);
1146 : 167 : q->lock_ptr = &hb2->lock;
1147 : : }
1148 : 167 : get_futex_key_refs(key2);
1149 : 167 : q->key = *key2;
1150 : : }
1151 : :
1152 : : /**
1153 : : * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1154 : : * @q: the futex_q
1155 : : * @key: the key of the requeue target futex
1156 : : * @hb: the hash_bucket of the requeue target futex
1157 : : *
1158 : : * During futex_requeue, with requeue_pi=1, it is possible to acquire the
1159 : : * target futex if it is uncontended or via a lock steal. Set the futex_q key
1160 : : * to the requeue target futex so the waiter can detect the wakeup on the right
1161 : : * futex, but remove it from the hb and NULL the rt_waiter so it can detect
1162 : : * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1163 : : * to protect access to the pi_state to fixup the owner later. Must be called
1164 : : * with both q->lock_ptr and hb->lock held.
1165 : : */
1166 : : static inline
1167 : : void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1168 : : struct futex_hash_bucket *hb)
1169 : : {
1170 : 0 : get_futex_key_refs(key);
1171 : 0 : q->key = *key;
1172 : :
1173 : 0 : __unqueue_futex(q);
1174 : :
1175 [ # # ][ # # ]: 0 : WARN_ON(!q->rt_waiter);
1176 : 0 : q->rt_waiter = NULL;
1177 : :
1178 : 0 : q->lock_ptr = &hb->lock;
1179 : :
1180 : 0 : wake_up_state(q->task, TASK_NORMAL);
1181 : : }
1182 : :
1183 : : /**
1184 : : * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
1185 : : * @pifutex: the user address of the to futex
1186 : : * @hb1: the from futex hash bucket, must be locked by the caller
1187 : : * @hb2: the to futex hash bucket, must be locked by the caller
1188 : : * @key1: the from futex key
1189 : : * @key2: the to futex key
1190 : : * @ps: address to store the pi_state pointer
1191 : : * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
1192 : : *
1193 : : * Try and get the lock on behalf of the top waiter if we can do it atomically.
1194 : : * Wake the top waiter if we succeed. If the caller specified set_waiters,
1195 : : * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
1196 : : * hb1 and hb2 must be held by the caller.
1197 : : *
1198 : : * Return:
1199 : : * 0 - failed to acquire the lock atomically;
1200 : : * 1 - acquired the lock;
1201 : : * <0 - error
1202 : : */
1203 : 0 : static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1204 : : struct futex_hash_bucket *hb1,
1205 : : struct futex_hash_bucket *hb2,
1206 : : union futex_key *key1, union futex_key *key2,
1207 : : struct futex_pi_state **ps, int set_waiters)
1208 : : {
1209 : : struct futex_q *top_waiter = NULL;
1210 : : u32 curval;
1211 : : int ret;
1212 : :
1213 [ # # ]: 0 : if (get_futex_value_locked(&curval, pifutex))
1214 : : return -EFAULT;
1215 : :
1216 : : /*
1217 : : * Find the top_waiter and determine if there are additional waiters.
1218 : : * If the caller intends to requeue more than 1 waiter to pifutex,
1219 : : * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
1220 : : * as we have means to handle the possible fault. If not, don't set
1221 : : * the bit unecessarily as it will force the subsequent unlock to enter
1222 : : * the kernel.
1223 : : */
1224 : 0 : top_waiter = futex_top_waiter(hb1, key1);
1225 : :
1226 : : /* There are no waiters, nothing for us to do. */
1227 [ # # ]: 0 : if (!top_waiter)
1228 : : return 0;
1229 : :
1230 : : /* Ensure we requeue to the expected futex. */
1231 [ # # ]: 0 : if (!match_futex(top_waiter->requeue_pi_key, key2))
1232 : : return -EINVAL;
1233 : :
1234 : : /*
1235 : : * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
1236 : : * the contended case or if set_waiters is 1. The pi_state is returned
1237 : : * in ps in contended cases.
1238 : : */
1239 : 0 : ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1240 : : set_waiters);
1241 [ # # ]: 0 : if (ret == 1)
1242 : : requeue_pi_wake_futex(top_waiter, key2, hb2);
1243 : :
1244 : 0 : return ret;
1245 : : }
1246 : :
1247 : : /**
1248 : : * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
1249 : : * @uaddr1: source futex user address
1250 : : * @flags: futex flags (FLAGS_SHARED, etc.)
1251 : : * @uaddr2: target futex user address
1252 : : * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
1253 : : * @nr_requeue: number of waiters to requeue (0-INT_MAX)
1254 : : * @cmpval: @uaddr1 expected value (or %NULL)
1255 : : * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
1256 : : * pi futex (pi to pi requeue is not supported)
1257 : : *
1258 : : * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
1259 : : * uaddr2 atomically on behalf of the top waiter.
1260 : : *
1261 : : * Return:
1262 : : * >=0 - on success, the number of tasks requeued or woken;
1263 : : * <0 - on error
1264 : : */
1265 : 0 : static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1266 : : u32 __user *uaddr2, int nr_wake, int nr_requeue,
1267 : : u32 *cmpval, int requeue_pi)
1268 : : {
1269 : 173 : union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1270 : : int drop_count = 0, task_count = 0, ret;
1271 : 173 : struct futex_pi_state *pi_state = NULL;
1272 : : struct futex_hash_bucket *hb1, *hb2;
1273 : : struct plist_head *head1;
1274 : : struct futex_q *this, *next;
1275 : : u32 curval2;
1276 : :
1277 [ - + ]: 173 : if (requeue_pi) {
1278 : : /*
1279 : : * requeue_pi requires a pi_state, try to allocate it now
1280 : : * without any locks in case it fails.
1281 : : */
1282 [ # # ]: 0 : if (refill_pi_state_cache())
1283 : : return -ENOMEM;
1284 : : /*
1285 : : * requeue_pi must wake as many tasks as it can, up to nr_wake
1286 : : * + nr_requeue, since it acquires the rt_mutex prior to
1287 : : * returning to userspace, so as to not leave the rt_mutex with
1288 : : * waiters and no owner. However, second and third wake-ups
1289 : : * cannot be predicted as they involve race conditions with the
1290 : : * first wake and a fault while looking up the pi_state. Both
1291 : : * pthread_cond_signal() and pthread_cond_broadcast() should
1292 : : * use nr_wake=1.
1293 : : */
1294 [ # # ]: 173 : if (nr_wake != 1)
1295 : : return -EINVAL;
1296 : : }
1297 : :
1298 : : retry:
1299 [ - + ]: 173 : if (pi_state != NULL) {
1300 : : /*
1301 : : * We will have to lookup the pi_state again, so free this one
1302 : : * to keep the accounting correct.
1303 : : */
1304 : 0 : free_pi_state(pi_state);
1305 : 0 : pi_state = NULL;
1306 : : }
1307 : :
1308 : 173 : ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1309 [ + - ]: 173 : if (unlikely(ret != 0))
1310 : : goto out;
1311 : 173 : ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1312 : : requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1313 [ + - ]: 173 : if (unlikely(ret != 0))
1314 : : goto out_put_key1;
1315 : :
1316 : 173 : hb1 = hash_futex(&key1);
1317 : 173 : hb2 = hash_futex(&key2);
1318 : :
1319 : : retry_private:
1320 : : double_lock_hb(hb1, hb2);
1321 : :
1322 [ + - ]: 173 : if (likely(cmpval != NULL)) {
1323 : : u32 curval;
1324 : :
1325 : 173 : ret = get_futex_value_locked(&curval, uaddr1);
1326 : :
1327 [ - + ]: 173 : if (unlikely(ret)) {
1328 : : double_unlock_hb(hb1, hb2);
1329 : :
1330 : 0 : ret = get_user(curval, uaddr1);
1331 [ # # ]: 0 : if (ret)
1332 : : goto out_put_keys;
1333 : :
1334 [ # # ]: 0 : if (!(flags & FLAGS_SHARED))
1335 : : goto retry_private;
1336 : :
1337 : : put_futex_key(&key2);
1338 : : put_futex_key(&key1);
1339 : 0 : goto retry;
1340 : : }
1341 [ - + ]: 173 : if (curval != *cmpval) {
1342 : : ret = -EAGAIN;
1343 : 173 : goto out_unlock;
1344 : : }
1345 : : }
1346 : :
1347 [ - + ][ # # ]: 173 : if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1348 : : /*
1349 : : * Attempt to acquire uaddr2 and wake the top waiter. If we
1350 : : * intend to requeue waiters, force setting the FUTEX_WAITERS
1351 : : * bit. We force this here where we are able to easily handle
1352 : : * faults rather in the requeue loop below.
1353 : : */
1354 : 0 : ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1355 : : &key2, &pi_state, nr_requeue);
1356 : :
1357 : : /*
1358 : : * At this point the top_waiter has either taken uaddr2 or is
1359 : : * waiting on it. If the former, then the pi_state will not
1360 : : * exist yet, look it up one more time to ensure we have a
1361 : : * reference to it.
1362 : : */
1363 [ # # ]: 0 : if (ret == 1) {
1364 [ # # ]: 0 : WARN_ON(pi_state);
1365 : 0 : drop_count++;
1366 : 0 : task_count++;
1367 : 0 : ret = get_futex_value_locked(&curval2, uaddr2);
1368 [ # # ]: 0 : if (!ret)
1369 : 0 : ret = lookup_pi_state(curval2, hb2, &key2,
1370 : : &pi_state);
1371 : : }
1372 : :
1373 [ # # # # ]: 0 : switch (ret) {
1374 : : case 0:
1375 : : break;
1376 : : case -EFAULT:
1377 : : double_unlock_hb(hb1, hb2);
1378 : : put_futex_key(&key2);
1379 : : put_futex_key(&key1);
1380 : 0 : ret = fault_in_user_writeable(uaddr2);
1381 [ # # ]: 0 : if (!ret)
1382 : : goto retry;
1383 : : goto out;
1384 : : case -EAGAIN:
1385 : : /* The owner was exiting, try again. */
1386 : : double_unlock_hb(hb1, hb2);
1387 : : put_futex_key(&key2);
1388 : : put_futex_key(&key1);
1389 : 0 : cond_resched();
1390 : 0 : goto retry;
1391 : : default:
1392 : : goto out_unlock;
1393 : : }
1394 : : }
1395 : :
1396 : : head1 = &hb1->chain;
1397 [ + + ]: 529 : plist_for_each_entry_safe(this, next, head1, list) {
1398 [ + - ]: 356 : if (task_count - nr_wake >= nr_requeue)
1399 : : break;
1400 : :
1401 [ + + ]: 356 : if (!match_futex(&this->key, &key1))
1402 : 16 : continue;
1403 : :
1404 : : /*
1405 : : * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
1406 : : * be paired with each other and no other futex ops.
1407 : : *
1408 : : * We should never be requeueing a futex_q with a pi_state,
1409 : : * which is awaiting a futex_unlock_pi().
1410 : : */
1411 [ - + ][ # # ]: 340 : if ((requeue_pi && !this->rt_waiter) ||
[ + - ]
1412 [ + - ][ + - ]: 340 : (!requeue_pi && this->rt_waiter) ||
1413 : 340 : this->pi_state) {
1414 : : ret = -EINVAL;
1415 : : break;
1416 : : }
1417 : :
1418 : : /*
1419 : : * Wake nr_wake waiters. For requeue_pi, if we acquired the
1420 : : * lock, we already woke the top_waiter. If not, it will be
1421 : : * woken by futex_unlock_pi().
1422 : : */
1423 [ + + ][ + - ]: 340 : if (++task_count <= nr_wake && !requeue_pi) {
1424 : 173 : wake_futex(this);
1425 : 173 : continue;
1426 : : }
1427 : :
1428 : : /* Ensure we requeue to the expected futex for requeue_pi. */
1429 [ - + ][ # # ]: 167 : if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1430 : : ret = -EINVAL;
1431 : : break;
1432 : : }
1433 : :
1434 : : /*
1435 : : * Requeue nr_requeue waiters and possibly one more in the case
1436 : : * of requeue_pi if we couldn't acquire the lock atomically.
1437 : : */
1438 [ - + ]: 167 : if (requeue_pi) {
1439 : : /* Prepare the waiter to take the rt_mutex. */
1440 : 0 : atomic_inc(&pi_state->refcount);
1441 : 0 : this->pi_state = pi_state;
1442 : 0 : ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1443 : : this->rt_waiter,
1444 : : this->task, 1);
1445 [ # # ]: 0 : if (ret == 1) {
1446 : : /* We got the lock. */
1447 : : requeue_pi_wake_futex(this, &key2, hb2);
1448 : 0 : drop_count++;
1449 : 0 : continue;
1450 [ # # ]: 0 : } else if (ret) {
1451 : : /* -EDEADLK */
1452 : 0 : this->pi_state = NULL;
1453 : 0 : free_pi_state(pi_state);
1454 : 0 : goto out_unlock;
1455 : : }
1456 : : }
1457 : : requeue_futex(this, hb1, hb2, &key2);
1458 : 167 : drop_count++;
1459 : : }
1460 : :
1461 : : out_unlock:
1462 : : double_unlock_hb(hb1, hb2);
1463 : :
1464 : : /*
1465 : : * drop_futex_key_refs() must be called outside the spinlocks. During
1466 : : * the requeue we moved futex_q's from the hash bucket at key1 to the
1467 : : * one at key2 and updated their key pointer. We no longer need to
1468 : : * hold the references to key1.
1469 : : */
1470 [ + + ]: 340 : while (--drop_count >= 0)
1471 : 167 : drop_futex_key_refs(&key1);
1472 : :
1473 : : out_put_keys:
1474 : : put_futex_key(&key2);
1475 : : out_put_key1:
1476 : : put_futex_key(&key1);
1477 : : out:
1478 [ - + ]: 173 : if (pi_state != NULL)
1479 : 0 : free_pi_state(pi_state);
1480 [ + - ]: 173 : return ret ? ret : task_count;
1481 : : }
1482 : :
1483 : : /* The key must be already stored in q->key. */
1484 : : static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1485 : : __acquires(&hb->lock)
1486 : : {
1487 : : struct futex_hash_bucket *hb;
1488 : :
1489 : 3569974 : hb = hash_futex(&q->key);
1490 : 3569795 : q->lock_ptr = &hb->lock;
1491 : :
1492 : : spin_lock(&hb->lock);
1493 : : return hb;
1494 : : }
1495 : :
1496 : : static inline void
1497 : : queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1498 : : __releases(&hb->lock)
1499 : : {
1500 : : spin_unlock(&hb->lock);
1501 : : }
1502 : :
1503 : : /**
1504 : : * queue_me() - Enqueue the futex_q on the futex_hash_bucket
1505 : : * @q: The futex_q to enqueue
1506 : : * @hb: The destination hash bucket
1507 : : *
1508 : : * The hb->lock must be held by the caller, and is released here. A call to
1509 : : * queue_me() is typically paired with exactly one call to unqueue_me(). The
1510 : : * exceptions involve the PI related operations, which may use unqueue_me_pi()
1511 : : * or nothing if the unqueue is done as part of the wake process and the unqueue
1512 : : * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
1513 : : * an example).
1514 : : */
1515 : : static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1516 : : __releases(&hb->lock)
1517 : : {
1518 : : int prio;
1519 : :
1520 : : /*
1521 : : * The priority used to register this element is
1522 : : * - either the real thread-priority for the real-time threads
1523 : : * (i.e. threads with a priority lower than MAX_RT_PRIO)
1524 : : * - or MAX_RT_PRIO for non-RT threads.
1525 : : * Thus, all RT-threads are woken first in priority order, and
1526 : : * the others are woken last, in FIFO order.
1527 : : */
1528 : 1025138 : prio = min(current->normal_prio, MAX_RT_PRIO);
1529 : :
1530 : : plist_node_init(&q->list, prio);
1531 : 1025138 : plist_add(&q->list, &hb->chain);
1532 : 1025145 : q->task = current;
1533 : : spin_unlock(&hb->lock);
1534 : : }
1535 : :
1536 : : /**
1537 : : * unqueue_me() - Remove the futex_q from its futex_hash_bucket
1538 : : * @q: The futex_q to unqueue
1539 : : *
1540 : : * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
1541 : : * be paired with exactly one earlier call to queue_me().
1542 : : *
1543 : : * Return:
1544 : : * 1 - if the futex_q was still queued (and we removed unqueued it);
1545 : : * 0 - if the futex_q was already removed by the waking thread
1546 : : */
1547 : 1024938 : static int unqueue_me(struct futex_q *q)
1548 : : {
1549 : : spinlock_t *lock_ptr;
1550 : : int ret = 0;
1551 : :
1552 : : /* In the common case we don't take the spinlock, which is nice. */
1553 : : retry:
1554 : 1024938 : lock_ptr = q->lock_ptr;
1555 : 1024938 : barrier();
1556 [ + + ]: 1024943 : if (lock_ptr != NULL) {
1557 : : spin_lock(lock_ptr);
1558 : : /*
1559 : : * q->lock_ptr can change between reading it and
1560 : : * spin_lock(), causing us to take the wrong lock. This
1561 : : * corrects the race condition.
1562 : : *
1563 : : * Reasoning goes like this: if we have the wrong lock,
1564 : : * q->lock_ptr must have changed (maybe several times)
1565 : : * between reading it and the spin_lock(). It can
1566 : : * change again after the spin_lock() but only if it was
1567 : : * already changed before the spin_lock(). It cannot,
1568 : : * however, change back to the original value. Therefore
1569 : : * we can detect whether we acquired the correct lock.
1570 : : */
1571 [ - + ]: 100017 : if (unlikely(lock_ptr != q->lock_ptr)) {
1572 : : spin_unlock(lock_ptr);
1573 : : goto retry;
1574 : : }
1575 : 100017 : __unqueue_futex(q);
1576 : :
1577 [ - + ]: 100017 : BUG_ON(q->pi_state);
1578 : :
1579 : : spin_unlock(lock_ptr);
1580 : : ret = 1;
1581 : : }
1582 : :
1583 : 1024943 : drop_futex_key_refs(&q->key);
1584 : 1024994 : return ret;
1585 : : }
1586 : :
1587 : : /*
1588 : : * PI futexes can not be requeued and must remove themself from the
1589 : : * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
1590 : : * and dropped here.
1591 : : */
1592 : 0 : static void unqueue_me_pi(struct futex_q *q)
1593 : : __releases(q->lock_ptr)
1594 : : {
1595 : 0 : __unqueue_futex(q);
1596 : :
1597 [ # # ]: 0 : BUG_ON(!q->pi_state);
1598 : 0 : free_pi_state(q->pi_state);
1599 : 0 : q->pi_state = NULL;
1600 : :
1601 : 0 : spin_unlock(q->lock_ptr);
1602 : 0 : }
1603 : :
1604 : : /*
1605 : : * Fixup the pi_state owner with the new owner.
1606 : : *
1607 : : * Must be called with hash bucket lock held and mm->sem held for non
1608 : : * private futexes.
1609 : : */
1610 : 0 : static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1611 : : struct task_struct *newowner)
1612 : : {
1613 : 0 : u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1614 : 0 : struct futex_pi_state *pi_state = q->pi_state;
1615 : 0 : struct task_struct *oldowner = pi_state->owner;
1616 : : u32 uval, uninitialized_var(curval), newval;
1617 : : int ret;
1618 : :
1619 : : /* Owner died? */
1620 [ # # ]: 0 : if (!pi_state->owner)
1621 : 0 : newtid |= FUTEX_OWNER_DIED;
1622 : :
1623 : : /*
1624 : : * We are here either because we stole the rtmutex from the
1625 : : * previous highest priority waiter or we are the highest priority
1626 : : * waiter but failed to get the rtmutex the first time.
1627 : : * We have to replace the newowner TID in the user space variable.
1628 : : * This must be atomic as we have to preserve the owner died bit here.
1629 : : *
1630 : : * Note: We write the user space value _before_ changing the pi_state
1631 : : * because we can fault here. Imagine swapped out pages or a fork
1632 : : * that marked all the anonymous memory readonly for cow.
1633 : : *
1634 : : * Modifying pi_state _before_ the user space value would
1635 : : * leave the pi_state in an inconsistent state when we fault
1636 : : * here, because we need to drop the hash bucket lock to
1637 : : * handle the fault. This might be observed in the PID check
1638 : : * in lookup_pi_state.
1639 : : */
1640 : : retry:
1641 [ # # ]: 0 : if (get_futex_value_locked(&uval, uaddr))
1642 : : goto handle_fault;
1643 : :
1644 : : while (1) {
1645 : 0 : newval = (uval & FUTEX_OWNER_DIED) | newtid;
1646 : :
1647 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1648 : : goto handle_fault;
1649 [ # # ]: 0 : if (curval == uval)
1650 : : break;
1651 : 0 : uval = curval;
1652 : : }
1653 : :
1654 : : /*
1655 : : * We fixed up user space. Now we need to fix the pi_state
1656 : : * itself.
1657 : : */
1658 [ # # ]: 0 : if (pi_state->owner != NULL) {
1659 : 0 : raw_spin_lock_irq(&pi_state->owner->pi_lock);
1660 [ # # ]: 0 : WARN_ON(list_empty(&pi_state->list));
1661 : : list_del_init(&pi_state->list);
1662 : 0 : raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1663 : : }
1664 : :
1665 : 0 : pi_state->owner = newowner;
1666 : :
1667 : 0 : raw_spin_lock_irq(&newowner->pi_lock);
1668 [ # # ]: 0 : WARN_ON(!list_empty(&pi_state->list));
1669 : 0 : list_add(&pi_state->list, &newowner->pi_state_list);
1670 : : raw_spin_unlock_irq(&newowner->pi_lock);
1671 : : return 0;
1672 : :
1673 : : /*
1674 : : * To handle the page fault we need to drop the hash bucket
1675 : : * lock here. That gives the other task (either the highest priority
1676 : : * waiter itself or the task which stole the rtmutex) the
1677 : : * chance to try the fixup of the pi_state. So once we are
1678 : : * back from handling the fault we need to check the pi_state
1679 : : * after reacquiring the hash bucket lock and before trying to
1680 : : * do another fixup. When the fixup has been done already we
1681 : : * simply return.
1682 : : */
1683 : : handle_fault:
1684 : 0 : spin_unlock(q->lock_ptr);
1685 : :
1686 : 0 : ret = fault_in_user_writeable(uaddr);
1687 : :
1688 : 0 : spin_lock(q->lock_ptr);
1689 : :
1690 : : /*
1691 : : * Check if someone else fixed it for us:
1692 : : */
1693 [ # # ]: 0 : if (pi_state->owner != oldowner)
1694 : : return 0;
1695 : :
1696 [ # # ]: 0 : if (ret)
1697 : : return ret;
1698 : :
1699 : : goto retry;
1700 : : }
1701 : :
1702 : : static long futex_wait_restart(struct restart_block *restart);
1703 : :
1704 : : /**
1705 : : * fixup_owner() - Post lock pi_state and corner case management
1706 : : * @uaddr: user address of the futex
1707 : : * @q: futex_q (contains pi_state and access to the rt_mutex)
1708 : : * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
1709 : : *
1710 : : * After attempting to lock an rt_mutex, this function is called to cleanup
1711 : : * the pi_state owner as well as handle race conditions that may allow us to
1712 : : * acquire the lock. Must be called with the hb lock held.
1713 : : *
1714 : : * Return:
1715 : : * 1 - success, lock taken;
1716 : : * 0 - success, lock not taken;
1717 : : * <0 - on error (-EFAULT)
1718 : : */
1719 : 0 : static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1720 : : {
1721 : : struct task_struct *owner;
1722 : : int ret = 0;
1723 : :
1724 [ # # ]: 0 : if (locked) {
1725 : : /*
1726 : : * Got the lock. We might not be the anticipated owner if we
1727 : : * did a lock-steal - fix up the PI-state in that case:
1728 : : */
1729 [ # # ]: 0 : if (q->pi_state->owner != current)
1730 : 0 : ret = fixup_pi_state_owner(uaddr, q, current);
1731 : : goto out;
1732 : : }
1733 : :
1734 : : /*
1735 : : * Catch the rare case, where the lock was released when we were on the
1736 : : * way back before we locked the hash bucket.
1737 : : */
1738 [ # # ]: 0 : if (q->pi_state->owner == current) {
1739 : : /*
1740 : : * Try to get the rt_mutex now. This might fail as some other
1741 : : * task acquired the rt_mutex after we removed ourself from the
1742 : : * rt_mutex waiters list.
1743 : : */
1744 [ # # ]: 0 : if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
1745 : : locked = 1;
1746 : : goto out;
1747 : : }
1748 : :
1749 : : /*
1750 : : * pi_state is incorrect, some other task did a lock steal and
1751 : : * we returned due to timeout or signal without taking the
1752 : : * rt_mutex. Too late.
1753 : : */
1754 : 0 : raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1755 : 0 : owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1756 [ # # ]: 0 : if (!owner)
1757 : 0 : owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1758 : 0 : raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1759 : 0 : ret = fixup_pi_state_owner(uaddr, q, owner);
1760 : 0 : goto out;
1761 : : }
1762 : :
1763 : : /*
1764 : : * Paranoia check. If we did not take the lock, then we should not be
1765 : : * the owner of the rt_mutex.
1766 : : */
1767 [ # # ]: 0 : if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1768 : 0 : printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
1769 : : "pi-state %p\n", ret,
1770 : : q->pi_state->pi_mutex.owner,
1771 : : q->pi_state->owner);
1772 : :
1773 : : out:
1774 [ # # ]: 0 : return ret ? ret : locked;
1775 : : }
1776 : :
1777 : : /**
1778 : : * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
1779 : : * @hb: the futex hash bucket, must be locked by the caller
1780 : : * @q: the futex_q to queue up on
1781 : : * @timeout: the prepared hrtimer_sleeper, or null for no timeout
1782 : : */
1783 : 0 : static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1784 : : struct hrtimer_sleeper *timeout)
1785 : : {
1786 : : /*
1787 : : * The task state is guaranteed to be set before another task can
1788 : : * wake it. set_current_state() is implemented using set_mb() and
1789 : : * queue_me() calls spin_unlock() upon completion, both serializing
1790 : : * access to the hash list and forcing another memory barrier.
1791 : : */
1792 : 1025145 : set_current_state(TASK_INTERRUPTIBLE);
1793 : : queue_me(q, hb);
1794 : :
1795 : : /* Arm the timer */
1796 [ + + ]: 1025179 : if (timeout) {
1797 : 812326 : hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1798 [ + + ]: 406163 : if (!hrtimer_active(&timeout->timer))
1799 : 18 : timeout->task = NULL;
1800 : : }
1801 : :
1802 : : /*
1803 : : * If we have been removed from the hash list, then another task
1804 : : * has tried to wake us, and we can skip the call to schedule().
1805 : : */
1806 [ + ]: 1025179 : if (likely(!plist_node_empty(&q->list))) {
1807 : : /*
1808 : : * If the timer has already expired, current will already be
1809 : : * flagged for rescheduling. Only call schedule if there
1810 : : * is no timeout, or if it has yet to expire.
1811 : : */
1812 [ + + ][ + + ]: 1025195 : if (!timeout || timeout->task)
1813 : : freezable_schedule();
1814 : : }
1815 : 0 : __set_current_state(TASK_RUNNING);
1816 : 0 : }
1817 : :
1818 : : /**
1819 : : * futex_wait_setup() - Prepare to wait on a futex
1820 : : * @uaddr: the futex userspace address
1821 : : * @val: the expected value
1822 : : * @flags: futex flags (FLAGS_SHARED, etc.)
1823 : : * @q: the associated futex_q
1824 : : * @hb: storage for hash_bucket pointer to be returned to caller
1825 : : *
1826 : : * Setup the futex_q and locate the hash_bucket. Get the futex value and
1827 : : * compare it with the expected value. Handle atomic faults internally.
1828 : : * Return with the hb lock held and a q.key reference on success, and unlocked
1829 : : * with no q.key reference on failure.
1830 : : *
1831 : : * Return:
1832 : : * 0 - uaddr contains val and hb has been locked;
1833 : : * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
1834 : : */
1835 : 3569770 : static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1836 : : struct futex_q *q, struct futex_hash_bucket **hb)
1837 : : {
1838 : : u32 uval;
1839 : : int ret;
1840 : :
1841 : : /*
1842 : : * Access the page AFTER the hash-bucket is locked.
1843 : : * Order is important:
1844 : : *
1845 : : * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
1846 : : * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); }
1847 : : *
1848 : : * The basic logical guarantee of a futex is that it blocks ONLY
1849 : : * if cond(var) is known to be true at the time of blocking, for
1850 : : * any cond. If we locked the hash-bucket after testing *uaddr, that
1851 : : * would open a race condition where we could block indefinitely with
1852 : : * cond(var) false, which would violate the guarantee.
1853 : : *
1854 : : * On the other hand, we insert q and release the hash-bucket only
1855 : : * after testing *uaddr. This guarantees that futex_wait() will NOT
1856 : : * absorb a wakeup if *uaddr does not match the desired values
1857 : : * while the syscall executes.
1858 : : */
1859 : : retry:
1860 : 3569770 : ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1861 [ + ]: 3569939 : if (unlikely(ret != 0))
1862 : : return ret;
1863 : :
1864 : : retry_private:
1865 : 3570116 : *hb = queue_lock(q);
1866 : :
1867 : 3570116 : ret = get_futex_value_locked(&uval, uaddr);
1868 : :
1869 [ - + ]: 3570124 : if (ret) {
1870 : 0 : queue_unlock(q, *hb);
1871 : :
1872 : 0 : ret = get_user(uval, uaddr);
1873 [ # # ]: 0 : if (ret)
1874 : : goto out;
1875 : :
1876 [ # # ]: 0 : if (!(flags & FLAGS_SHARED))
1877 : : goto retry_private;
1878 : :
1879 : : put_futex_key(&q->key);
1880 : : goto retry;
1881 : : }
1882 : :
1883 [ + + ]: 3570124 : if (uval != val) {
1884 : 2544991 : queue_unlock(q, *hb);
1885 : : ret = -EWOULDBLOCK;
1886 : : }
1887 : :
1888 : : out:
1889 [ + + ]: 7139892 : if (ret)
1890 : : put_futex_key(&q->key);
1891 : 3570122 : return ret;
1892 : : }
1893 : :
1894 : 0 : static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1895 : : ktime_t *abs_time, u32 bitset)
1896 : : {
1897 : : struct hrtimer_sleeper timeout, *to = NULL;
1898 : : struct restart_block *restart;
1899 : : struct futex_hash_bucket *hb;
1900 : 3569828 : struct futex_q q = futex_q_init;
1901 : : int ret;
1902 : :
1903 [ + ]: 3569828 : if (!bitset)
1904 : : return -EINVAL;
1905 : 3570024 : q.bitset = bitset;
1906 : :
1907 [ + + ]: 3570024 : if (abs_time) {
1908 : : to = &timeout;
1909 : :
1910 : 406174 : hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1911 : : CLOCK_REALTIME : CLOCK_MONOTONIC,
1912 : : HRTIMER_MODE_ABS);
1913 : 406174 : hrtimer_init_sleeper(to, current);
1914 : 406174 : hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1915 : 406174 : current->timer_slack_ns);
1916 : : }
1917 : :
1918 : : retry:
1919 : : /*
1920 : : * Prepare to wait on uaddr. On success, holds hb lock and increments
1921 : : * q.key refs.
1922 : : */
1923 : 0 : ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1924 [ + + ]: 3570123 : if (ret)
1925 : : goto out;
1926 : :
1927 : : /* queue_me and wait for wakeup, timeout, or a signal. */
1928 : 1025141 : futex_wait_queue_me(hb, &q, to);
1929 : :
1930 : : /* If we were woken (and unqueued), we succeeded, whatever. */
1931 : : ret = 0;
1932 : : /* unqueue_me() drops q.key ref */
1933 [ + + ]: 1024822 : if (!unqueue_me(&q))
1934 : : goto out;
1935 : : ret = -ETIMEDOUT;
1936 [ + + ][ + ]: 100017 : if (to && !to->task)
1937 : : goto out;
1938 : :
1939 : : /*
1940 : : * We expect signal_pending(current), but we might be the
1941 : : * victim of a spurious wakeup as well.
1942 : : */
1943 [ + ]: 0 : if (!signal_pending(current))
1944 : : goto retry;
1945 : :
1946 : : ret = -ERESTARTSYS;
1947 [ + + ]: 15 : if (!abs_time)
1948 : : goto out;
1949 : :
1950 : : restart = ¤t_thread_info()->restart_block;
1951 : 5 : restart->fn = futex_wait_restart;
1952 : 5 : restart->futex.uaddr = uaddr;
1953 : 5 : restart->futex.val = val;
1954 : 5 : restart->futex.time = abs_time->tv64;
1955 : 5 : restart->futex.bitset = bitset;
1956 : 5 : restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1957 : :
1958 : : ret = -ERESTART_RESTARTBLOCK;
1959 : :
1960 : : out:
1961 [ + + ]: 3570011 : if (to) {
1962 : 406174 : hrtimer_cancel(&to->timer);
1963 : : destroy_hrtimer_on_stack(&to->timer);
1964 : : }
1965 : 3570011 : return ret;
1966 : : }
1967 : :
1968 : :
1969 : 0 : static long futex_wait_restart(struct restart_block *restart)
1970 : : {
1971 : 0 : u32 __user *uaddr = restart->futex.uaddr;
1972 : : ktime_t t, *tp = NULL;
1973 : :
1974 [ # # ]: 0 : if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1975 : 0 : t.tv64 = restart->futex.time;
1976 : : tp = &t;
1977 : : }
1978 : 0 : restart->fn = do_no_restart_syscall;
1979 : :
1980 : 0 : return (long)futex_wait(uaddr, restart->futex.flags,
1981 : : restart->futex.val, tp, restart->futex.bitset);
1982 : : }
1983 : :
1984 : :
1985 : : /*
1986 : : * Userspace tried a 0 -> TID atomic transition of the futex value
1987 : : * and failed. The kernel side here does the whole locking operation:
1988 : : * if there are waiters then it will block, it does PI, etc. (Due to
1989 : : * races the kernel might see a 0 value of the futex too.)
1990 : : */
1991 : 0 : static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1992 : : ktime_t *time, int trylock)
1993 : : {
1994 : : struct hrtimer_sleeper timeout, *to = NULL;
1995 : : struct futex_hash_bucket *hb;
1996 : 0 : struct futex_q q = futex_q_init;
1997 : : int res, ret;
1998 : :
1999 [ # # ]: 0 : if (refill_pi_state_cache())
2000 : : return -ENOMEM;
2001 : :
2002 [ # # ]: 0 : if (time) {
2003 : : to = &timeout;
2004 : : hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
2005 : : HRTIMER_MODE_ABS);
2006 : 0 : hrtimer_init_sleeper(to, current);
2007 : : hrtimer_set_expires(&to->timer, *time);
2008 : : }
2009 : :
2010 : : retry:
2011 : 0 : ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
2012 [ # # ]: 0 : if (unlikely(ret != 0))
2013 : : goto out;
2014 : :
2015 : : retry_private:
2016 : : hb = queue_lock(&q);
2017 : :
2018 : 0 : ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
2019 [ # # ]: 0 : if (unlikely(ret)) {
2020 [ # # # # ]: 0 : switch (ret) {
2021 : : case 1:
2022 : : /* We got the lock. */
2023 : : ret = 0;
2024 : : goto out_unlock_put_key;
2025 : : case -EFAULT:
2026 : : goto uaddr_faulted;
2027 : : case -EAGAIN:
2028 : : /*
2029 : : * Task is exiting and we just wait for the
2030 : : * exit to complete.
2031 : : */
2032 : : queue_unlock(&q, hb);
2033 : : put_futex_key(&q.key);
2034 : 0 : cond_resched();
2035 : : goto retry;
2036 : : default:
2037 : : goto out_unlock_put_key;
2038 : : }
2039 : : }
2040 : :
2041 : : /*
2042 : : * Only actually queue now that the atomic ops are done:
2043 : : */
2044 : : queue_me(&q, hb);
2045 : :
2046 [ # # ]: 0 : WARN_ON(!q.pi_state);
2047 : : /*
2048 : : * Block on the PI mutex:
2049 : : */
2050 [ # # ]: 0 : if (!trylock)
2051 : 0 : ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1);
2052 : : else {
2053 : 0 : ret = rt_mutex_trylock(&q.pi_state->pi_mutex);
2054 : : /* Fixup the trylock return value: */
2055 [ # # ]: 0 : ret = ret ? 0 : -EWOULDBLOCK;
2056 : : }
2057 : :
2058 : 0 : spin_lock(q.lock_ptr);
2059 : : /*
2060 : : * Fixup the pi_state owner and possibly acquire the lock if we
2061 : : * haven't already.
2062 : : */
2063 : 0 : res = fixup_owner(uaddr, &q, !ret);
2064 : : /*
2065 : : * If fixup_owner() returned an error, proprogate that. If it acquired
2066 : : * the lock, clear our -ETIMEDOUT or -EINTR.
2067 : : */
2068 [ # # ]: 0 : if (res)
2069 : 0 : ret = (res < 0) ? res : 0;
2070 : :
2071 : : /*
2072 : : * If fixup_owner() faulted and was unable to handle the fault, unlock
2073 : : * it and return the fault to userspace.
2074 : : */
2075 [ # # ][ # # ]: 0 : if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
2076 : 0 : rt_mutex_unlock(&q.pi_state->pi_mutex);
2077 : :
2078 : : /* Unqueue and drop the lock */
2079 : 0 : unqueue_me_pi(&q);
2080 : :
2081 : : goto out_put_key;
2082 : :
2083 : : out_unlock_put_key:
2084 : : queue_unlock(&q, hb);
2085 : :
2086 : : out_put_key:
2087 : : put_futex_key(&q.key);
2088 : : out:
2089 : : if (to)
2090 : : destroy_hrtimer_on_stack(&to->timer);
2091 [ # # ]: 0 : return ret != -EINTR ? ret : -ERESTARTNOINTR;
2092 : :
2093 : : uaddr_faulted:
2094 : : queue_unlock(&q, hb);
2095 : :
2096 : 0 : ret = fault_in_user_writeable(uaddr);
2097 [ # # ]: 0 : if (ret)
2098 : : goto out_put_key;
2099 : :
2100 [ # # ]: 0 : if (!(flags & FLAGS_SHARED))
2101 : : goto retry_private;
2102 : :
2103 : : put_futex_key(&q.key);
2104 : : goto retry;
2105 : : }
2106 : :
2107 : : /*
2108 : : * Userspace attempted a TID -> 0 atomic transition, and failed.
2109 : : * This is the in-kernel slowpath: we look up the PI state (if any),
2110 : : * and do the rt-mutex unlock.
2111 : : */
2112 : 0 : static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2113 : : {
2114 : : struct futex_hash_bucket *hb;
2115 : 0 : struct futex_q *this, *next;
2116 : : struct plist_head *head;
2117 : 0 : union futex_key key = FUTEX_KEY_INIT;
2118 : 0 : u32 uval, vpid = task_pid_vnr(current);
2119 : : int ret;
2120 : :
2121 : : retry:
2122 [ # # ]: 0 : if (get_user(uval, uaddr))
2123 : : return -EFAULT;
2124 : : /*
2125 : : * We release only a lock we actually own:
2126 : : */
2127 [ # # ]: 0 : if ((uval & FUTEX_TID_MASK) != vpid)
2128 : : return -EPERM;
2129 : :
2130 : 0 : ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2131 [ # # ]: 0 : if (unlikely(ret != 0))
2132 : : goto out;
2133 : :
2134 : 0 : hb = hash_futex(&key);
2135 : : spin_lock(&hb->lock);
2136 : :
2137 : : /*
2138 : : * To avoid races, try to do the TID -> 0 atomic transition
2139 : : * again. If it succeeds then we can return without waking
2140 : : * anyone else up:
2141 : : */
2142 [ # # # # ]: 0 : if (!(uval & FUTEX_OWNER_DIED) &&
2143 : 0 : cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2144 : : goto pi_faulted;
2145 : : /*
2146 : : * Rare case: we managed to release the lock atomically,
2147 : : * no need to wake anyone else up:
2148 : : */
2149 [ # # ]: 0 : if (unlikely(uval == vpid))
2150 : : goto out_unlock;
2151 : :
2152 : : /*
2153 : : * Ok, other tasks may need to be woken up - check waiters
2154 : : * and do the wakeup if necessary:
2155 : : */
2156 : : head = &hb->chain;
2157 : :
2158 [ # # ]: 0 : plist_for_each_entry_safe(this, next, head, list) {
2159 [ # # ]: 0 : if (!match_futex (&this->key, &key))
2160 : 0 : continue;
2161 : 0 : ret = wake_futex_pi(uaddr, uval, this);
2162 : : /*
2163 : : * The atomic access to the futex value
2164 : : * generated a pagefault, so retry the
2165 : : * user-access and the wakeup:
2166 : : */
2167 [ # # ]: 0 : if (ret == -EFAULT)
2168 : : goto pi_faulted;
2169 : : goto out_unlock;
2170 : : }
2171 : : /*
2172 : : * No waiters - kernel unlocks the futex:
2173 : : */
2174 [ # # ]: 0 : if (!(uval & FUTEX_OWNER_DIED)) {
2175 : 0 : ret = unlock_futex_pi(uaddr, uval);
2176 [ # # ]: 0 : if (ret == -EFAULT)
2177 : : goto pi_faulted;
2178 : : }
2179 : :
2180 : : out_unlock:
2181 : : spin_unlock(&hb->lock);
2182 : : put_futex_key(&key);
2183 : :
2184 : : out:
2185 : 0 : return ret;
2186 : :
2187 : : pi_faulted:
2188 : : spin_unlock(&hb->lock);
2189 : : put_futex_key(&key);
2190 : :
2191 : 0 : ret = fault_in_user_writeable(uaddr);
2192 [ # # ]: 0 : if (!ret)
2193 : : goto retry;
2194 : :
2195 : : return ret;
2196 : : }
2197 : :
2198 : : /**
2199 : : * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
2200 : : * @hb: the hash_bucket futex_q was original enqueued on
2201 : : * @q: the futex_q woken while waiting to be requeued
2202 : : * @key2: the futex_key of the requeue target futex
2203 : : * @timeout: the timeout associated with the wait (NULL if none)
2204 : : *
2205 : : * Detect if the task was woken on the initial futex as opposed to the requeue
2206 : : * target futex. If so, determine if it was a timeout or a signal that caused
2207 : : * the wakeup and return the appropriate error code to the caller. Must be
2208 : : * called with the hb lock held.
2209 : : *
2210 : : * Return:
2211 : : * 0 = no early wakeup detected;
2212 : : * <0 = -ETIMEDOUT or -ERESTARTNOINTR
2213 : : */
2214 : : static inline
2215 : : int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2216 : : struct futex_q *q, union futex_key *key2,
2217 : : struct hrtimer_sleeper *timeout)
2218 : : {
2219 : : int ret = 0;
2220 : :
2221 : : /*
2222 : : * With the hb lock held, we avoid races while we process the wakeup.
2223 : : * We only need to hold hb (and not hb2) to ensure atomicity as the
2224 : : * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
2225 : : * It can't be requeued from uaddr2 to something else since we don't
2226 : : * support a PI aware source futex for requeue.
2227 : : */
2228 [ # # ]: 0 : if (!match_futex(&q->key, key2)) {
2229 [ # # ][ # # ]: 0 : WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
[ # # ]
2230 : : /*
2231 : : * We were woken prior to requeue by a timeout or a signal.
2232 : : * Unqueue the futex_q and determine which it was.
2233 : : */
2234 : 0 : plist_del(&q->list, &hb->chain);
2235 : :
2236 : : /* Handle spurious wakeups gracefully */
2237 : : ret = -EWOULDBLOCK;
2238 [ # # ][ # # ]: 0 : if (timeout && !timeout->task)
2239 : : ret = -ETIMEDOUT;
2240 [ # # ]: 0 : else if (signal_pending(current))
2241 : : ret = -ERESTARTNOINTR;
2242 : : }
2243 : : return ret;
2244 : : }
2245 : :
2246 : : /**
2247 : : * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2248 : : * @uaddr: the futex we initially wait on (non-pi)
2249 : : * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
2250 : : * the same type, no requeueing from private to shared, etc.
2251 : : * @val: the expected value of uaddr
2252 : : * @abs_time: absolute timeout
2253 : : * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
2254 : : * @uaddr2: the pi futex we will take prior to returning to user-space
2255 : : *
2256 : : * The caller will wait on uaddr and will be requeued by futex_requeue() to
2257 : : * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
2258 : : * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
2259 : : * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
2260 : : * without one, the pi logic would not know which task to boost/deboost, if
2261 : : * there was a need to.
2262 : : *
2263 : : * We call schedule in futex_wait_queue_me() when we enqueue and return there
2264 : : * via the following--
2265 : : * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
2266 : : * 2) wakeup on uaddr2 after a requeue
2267 : : * 3) signal
2268 : : * 4) timeout
2269 : : *
2270 : : * If 3, cleanup and return -ERESTARTNOINTR.
2271 : : *
2272 : : * If 2, we may then block on trying to take the rt_mutex and return via:
2273 : : * 5) successful lock
2274 : : * 6) signal
2275 : : * 7) timeout
2276 : : * 8) other lock acquisition failure
2277 : : *
2278 : : * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
2279 : : *
2280 : : * If 4 or 7, we cleanup and return with -ETIMEDOUT.
2281 : : *
2282 : : * Return:
2283 : : * 0 - On success;
2284 : : * <0 - On error
2285 : : */
2286 : 0 : static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2287 : : u32 val, ktime_t *abs_time, u32 bitset,
2288 : : u32 __user *uaddr2)
2289 : : {
2290 : : struct hrtimer_sleeper timeout, *to = NULL;
2291 : : struct rt_mutex_waiter rt_waiter;
2292 : 0 : struct rt_mutex *pi_mutex = NULL;
2293 : : struct futex_hash_bucket *hb;
2294 : 0 : union futex_key key2 = FUTEX_KEY_INIT;
2295 : 0 : struct futex_q q = futex_q_init;
2296 : : int res, ret;
2297 : :
2298 [ # # ]: 0 : if (uaddr == uaddr2)
2299 : : return -EINVAL;
2300 : :
2301 [ # # ]: 0 : if (!bitset)
2302 : : return -EINVAL;
2303 : :
2304 [ # # ]: 0 : if (abs_time) {
2305 : : to = &timeout;
2306 : 0 : hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2307 : : CLOCK_REALTIME : CLOCK_MONOTONIC,
2308 : : HRTIMER_MODE_ABS);
2309 : 0 : hrtimer_init_sleeper(to, current);
2310 : 0 : hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2311 : 0 : current->timer_slack_ns);
2312 : : }
2313 : :
2314 : : /*
2315 : : * The waiter is allocated on our stack, manipulated by the requeue
2316 : : * code while we sleep on uaddr.
2317 : : */
2318 : : debug_rt_mutex_init_waiter(&rt_waiter);
2319 : 0 : rt_waiter.task = NULL;
2320 : :
2321 : 0 : ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2322 [ # # ]: 0 : if (unlikely(ret != 0))
2323 : : goto out;
2324 : :
2325 : 0 : q.bitset = bitset;
2326 : 0 : q.rt_waiter = &rt_waiter;
2327 : 0 : q.requeue_pi_key = &key2;
2328 : :
2329 : : /*
2330 : : * Prepare to wait on uaddr. On success, increments q.key (key1) ref
2331 : : * count.
2332 : : */
2333 : 0 : ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2334 [ # # ]: 0 : if (ret)
2335 : : goto out_key2;
2336 : :
2337 : : /* Queue the futex_q, drop the hb lock, wait for wakeup. */
2338 : 0 : futex_wait_queue_me(hb, &q, to);
2339 : :
2340 : 0 : spin_lock(&hb->lock);
2341 : 0 : ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2342 : 0 : spin_unlock(&hb->lock);
2343 [ # # ]: 0 : if (ret)
2344 : : goto out_put_keys;
2345 : :
2346 : : /*
2347 : : * In order for us to be here, we know our q.key == key2, and since
2348 : : * we took the hb->lock above, we also know that futex_requeue() has
2349 : : * completed and we no longer have to concern ourselves with a wakeup
2350 : : * race with the atomic proxy lock acquisition by the requeue code. The
2351 : : * futex_requeue dropped our key1 reference and incremented our key2
2352 : : * reference count.
2353 : : */
2354 : :
2355 : : /* Check if the requeue code acquired the second futex for us. */
2356 [ # # ]: 0 : if (!q.rt_waiter) {
2357 : : /*
2358 : : * Got the lock. We might not be the anticipated owner if we
2359 : : * did a lock-steal - fix up the PI-state in that case.
2360 : : */
2361 [ # # ][ # # ]: 0 : if (q.pi_state && (q.pi_state->owner != current)) {
2362 : 0 : spin_lock(q.lock_ptr);
2363 : 0 : ret = fixup_pi_state_owner(uaddr2, &q, current);
2364 : 0 : spin_unlock(q.lock_ptr);
2365 : : }
2366 : : } else {
2367 : : /*
2368 : : * We have been woken up by futex_unlock_pi(), a timeout, or a
2369 : : * signal. futex_unlock_pi() will not destroy the lock_ptr nor
2370 : : * the pi_state.
2371 : : */
2372 [ # # ]: 0 : WARN_ON(!q.pi_state);
2373 : 0 : pi_mutex = &q.pi_state->pi_mutex;
2374 : 0 : ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2375 : : debug_rt_mutex_free_waiter(&rt_waiter);
2376 : :
2377 : 0 : spin_lock(q.lock_ptr);
2378 : : /*
2379 : : * Fixup the pi_state owner and possibly acquire the lock if we
2380 : : * haven't already.
2381 : : */
2382 : 0 : res = fixup_owner(uaddr2, &q, !ret);
2383 : : /*
2384 : : * If fixup_owner() returned an error, proprogate that. If it
2385 : : * acquired the lock, clear -ETIMEDOUT or -EINTR.
2386 : : */
2387 [ # # ]: 0 : if (res)
2388 : 0 : ret = (res < 0) ? res : 0;
2389 : :
2390 : : /* Unqueue and drop the lock. */
2391 : 0 : unqueue_me_pi(&q);
2392 : : }
2393 : :
2394 : : /*
2395 : : * If fixup_pi_state_owner() faulted and was unable to handle the
2396 : : * fault, unlock the rt_mutex and return the fault to userspace.
2397 : : */
2398 [ # # ]: 0 : if (ret == -EFAULT) {
2399 [ # # ][ # # ]: 0 : if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2400 : 0 : rt_mutex_unlock(pi_mutex);
2401 [ # # ]: 0 : } else if (ret == -EINTR) {
2402 : : /*
2403 : : * We've already been requeued, but cannot restart by calling
2404 : : * futex_lock_pi() directly. We could restart this syscall, but
2405 : : * it would detect that the user space "val" changed and return
2406 : : * -EWOULDBLOCK. Save the overhead of the restart and return
2407 : : * -EWOULDBLOCK directly.
2408 : : */
2409 : : ret = -EWOULDBLOCK;
2410 : : }
2411 : :
2412 : : out_put_keys:
2413 : : put_futex_key(&q.key);
2414 : : out_key2:
2415 : : put_futex_key(&key2);
2416 : :
2417 : : out:
2418 [ # # ]: 0 : if (to) {
2419 : 0 : hrtimer_cancel(&to->timer);
2420 : : destroy_hrtimer_on_stack(&to->timer);
2421 : : }
2422 : 0 : return ret;
2423 : : }
2424 : :
2425 : : /*
2426 : : * Support for robust futexes: the kernel cleans up held futexes at
2427 : : * thread exit time.
2428 : : *
2429 : : * Implementation: user-space maintains a per-thread list of locks it
2430 : : * is holding. Upon do_exit(), the kernel carefully walks this list,
2431 : : * and marks all locks that are owned by this thread with the
2432 : : * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
2433 : : * always manipulated with the lock held, so the list is private and
2434 : : * per-thread. Userspace also maintains a per-thread 'list_op_pending'
2435 : : * field, to allow the kernel to clean up if the thread dies after
2436 : : * acquiring the lock, but just before it could have added itself to
2437 : : * the list. There can only be one such pending lock.
2438 : : */
2439 : :
2440 : : /**
2441 : : * sys_set_robust_list() - Set the robust-futex list head of a task
2442 : : * @head: pointer to the list-head
2443 : : * @len: length of the list-head, as userspace expects
2444 : : */
2445 : 0 : SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2446 : : size_t, len)
2447 : : {
2448 [ + ]: 14208 : if (!futex_cmpxchg_enabled)
2449 : : return -ENOSYS;
2450 : : /*
2451 : : * The kernel knows only one size for now:
2452 : : */
2453 [ + ]: 14209 : if (unlikely(len != sizeof(*head)))
2454 : : return -EINVAL;
2455 : :
2456 : 14210 : current->robust_list = head;
2457 : :
2458 : : return 0;
2459 : : }
2460 : :
2461 : : /**
2462 : : * sys_get_robust_list() - Get the robust-futex list head of a task
2463 : : * @pid: pid of the process [zero for current task]
2464 : : * @head_ptr: pointer to a list-head pointer, the kernel fills it in
2465 : : * @len_ptr: pointer to a length field, the kernel fills in the header size
2466 : : */
2467 : 0 : SYSCALL_DEFINE3(get_robust_list, int, pid,
2468 : : struct robust_list_head __user * __user *, head_ptr,
2469 : : size_t __user *, len_ptr)
2470 : : {
2471 : : struct robust_list_head __user *head;
2472 : : unsigned long ret;
2473 : : struct task_struct *p;
2474 : :
2475 [ + - ]: 5 : if (!futex_cmpxchg_enabled)
2476 : : return -ENOSYS;
2477 : :
2478 : : rcu_read_lock();
2479 : :
2480 : : ret = -ESRCH;
2481 [ + + ]: 5 : if (!pid)
2482 : 3 : p = current;
2483 : : else {
2484 : 2 : p = find_task_by_vpid(pid);
2485 [ + + ]: 2 : if (!p)
2486 : : goto err_unlock;
2487 : : }
2488 : :
2489 : : ret = -EPERM;
2490 [ + + ]: 4 : if (!ptrace_may_access(p, PTRACE_MODE_READ))
2491 : : goto err_unlock;
2492 : :
2493 : 3 : head = p->robust_list;
2494 : : rcu_read_unlock();
2495 : :
2496 [ + + ]: 3 : if (put_user(sizeof(*head), len_ptr))
2497 : : return -EFAULT;
2498 : 2 : return put_user(head, head_ptr);
2499 : :
2500 : : err_unlock:
2501 : : rcu_read_unlock();
2502 : :
2503 : 2 : return ret;
2504 : : }
2505 : :
2506 : : /*
2507 : : * Process a futex-list entry, check whether it's owned by the
2508 : : * dying task, and do notification if so:
2509 : : */
2510 : 1 : int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
2511 : : {
2512 : : u32 uval, uninitialized_var(nval), mval;
2513 : :
2514 : : retry:
2515 [ - + ]: 1 : if (get_user(uval, uaddr))
2516 : : return -1;
2517 : :
2518 [ # # ]: 0 : if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
2519 : : /*
2520 : : * Ok, this dying thread is truly holding a futex
2521 : : * of interest. Set the OWNER_DIED bit atomically
2522 : : * via cmpxchg, and if the value had FUTEX_WAITERS
2523 : : * set, wake up a waiter (if any). (We have to do a
2524 : : * futex_wake() even if OWNER_DIED is already set -
2525 : : * to handle the rare but possible case of recursive
2526 : : * thread-death.) The rest of the cleanup is done in
2527 : : * userspace.
2528 : : */
2529 : 0 : mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2530 : : /*
2531 : : * We are not holding a lock here, but we want to have
2532 : : * the pagefault_disable/enable() protection because
2533 : : * we want to handle the fault gracefully. If the
2534 : : * access fails we try to fault in the futex with R/W
2535 : : * verification via get_user_pages. get_user() above
2536 : : * does not guarantee R/W access. If that fails we
2537 : : * give up and leave the futex locked.
2538 : : */
2539 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2540 [ # # ]: 0 : if (fault_in_user_writeable(uaddr))
2541 : : return -1;
2542 : : goto retry;
2543 : : }
2544 [ # # ]: 0 : if (nval != uval)
2545 : : goto retry;
2546 : :
2547 : : /*
2548 : : * Wake robust non-PI futexes here. The wakeup of
2549 : : * PI futexes happens in exit_pi_state():
2550 : : */
2551 [ # # ]: 0 : if (!pi && (uval & FUTEX_WAITERS))
2552 : 0 : futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
2553 : : }
2554 : : return 0;
2555 : : }
2556 : :
2557 : : /*
2558 : : * Fetch a robust-list pointer. Bit 0 signals PI futexes:
2559 : : */
2560 : : static inline int fetch_robust_entry(struct robust_list __user **entry,
2561 : : struct robust_list __user * __user *head,
2562 : : unsigned int *pi)
2563 : : {
2564 : : unsigned long uentry;
2565 : :
2566 [ + + ][ + + ]: 28419 : if (get_user(uentry, (unsigned long __user *)head))
[ + - ]
2567 : : return -EFAULT;
2568 : :
2569 : 28418 : *entry = (void __user *)(uentry & ~1UL);
2570 : 28418 : *pi = uentry & 1;
2571 : :
2572 : : return 0;
2573 : : }
2574 : :
2575 : : /*
2576 : : * Walk curr->robust_list (very carefully, it's a userspace list!)
2577 : : * and mark any locks found there dead, and notify any waiters.
2578 : : *
2579 : : * We silently return on any sign of list-walking problem.
2580 : : */
2581 : 0 : void exit_robust_list(struct task_struct *curr)
2582 : : {
2583 : 14210 : struct robust_list_head __user *head = curr->robust_list;
2584 : : struct robust_list __user *entry, *next_entry, *pending;
2585 : : unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2586 : : unsigned int uninitialized_var(next_pi);
2587 : : unsigned long futex_offset;
2588 : : int rc;
2589 : :
2590 [ + + ]: 14210 : if (!futex_cmpxchg_enabled)
2591 : : return;
2592 : :
2593 : : /*
2594 : : * Fetch the list head (which was registered earlier, via
2595 : : * sys_set_robust_list()):
2596 : : */
2597 [ + + ]: 14210 : if (fetch_robust_entry(&entry, &head->list.next, &pi))
2598 : : return;
2599 : : /*
2600 : : * Fetch the relative futex offset:
2601 : : */
2602 [ + - ]: 14209 : if (get_user(futex_offset, &head->futex_offset))
2603 : : return;
2604 : : /*
2605 : : * Fetch any possibly pending lock-add first, and handle it
2606 : : * if it exists:
2607 : : */
2608 [ + + ]: 14209 : if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
2609 : : return;
2610 : :
2611 : : next_entry = NULL; /* avoid warning with gcc */
2612 [ + + ]: 14208 : while (entry != &head->list) {
2613 : : /*
2614 : : * Fetch the next entry in the list before calling
2615 : : * handle_futex_death:
2616 : : */
2617 : 1 : rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi);
2618 : : /*
2619 : : * A pending lock might already be on the list, so
2620 : : * don't process it twice:
2621 : : */
2622 [ + - ]: 1 : if (entry != pending)
2623 [ - + ]: 1 : if (handle_futex_death((void __user *)entry + futex_offset,
2624 : : curr, pi))
2625 : : return;
2626 [ # # ]: 0 : if (rc)
2627 : : return;
2628 : : entry = next_entry;
2629 : : pi = next_pi;
2630 : : /*
2631 : : * Avoid excessively long or circular lists:
2632 : : */
2633 [ # # ]: 0 : if (!--limit)
2634 : : break;
2635 : :
2636 : 0 : cond_resched();
2637 : : }
2638 : :
2639 [ - + ]: 14207 : if (pending)
2640 : 0 : handle_futex_death((void __user *)pending + futex_offset,
2641 : : curr, pip);
2642 : : }
2643 : :
2644 : 0 : long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2645 : : u32 __user *uaddr2, u32 val2, u32 val3)
2646 : : {
2647 : 11118755 : int cmd = op & FUTEX_CMD_MASK;
2648 : : unsigned int flags = 0;
2649 : :
2650 [ + + ]: 11118755 : if (!(op & FUTEX_PRIVATE_FLAG))
2651 : : flags |= FLAGS_SHARED;
2652 : :
2653 [ + + ]: 11118755 : if (op & FUTEX_CLOCK_REALTIME) {
2654 : 406174 : flags |= FLAGS_CLOCKRT;
2655 [ + ][ + + ]: 11118755 : if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
[ + - ]
2656 : : return -ENOSYS;
2657 : : }
2658 : :
2659 : : switch (cmd) {
2660 : : case FUTEX_LOCK_PI:
2661 : : case FUTEX_UNLOCK_PI:
2662 : : case FUTEX_TRYLOCK_PI:
2663 : : case FUTEX_WAIT_REQUEUE_PI:
2664 : : case FUTEX_CMP_REQUEUE_PI:
2665 [ # # ]: 0 : if (!futex_cmpxchg_enabled)
2666 : : return -ENOSYS;
2667 : : }
2668 : :
2669 [ + + + - : 11416779 : switch (cmd) {
- + + - -
- - - - ]
2670 : : case FUTEX_WAIT:
2671 : 3163779 : val3 = FUTEX_BITSET_MATCH_ANY;
2672 : : case FUTEX_WAIT_BITSET:
2673 : 3569953 : return futex_wait(uaddr, flags, val, timeout, val3);
2674 : : case FUTEX_WAKE:
2675 : 7338761 : val3 = FUTEX_BITSET_MATCH_ANY;
2676 : : case FUTEX_WAKE_BITSET:
2677 : 7338761 : return futex_wake(uaddr, flags, val, val3);
2678 : : case FUTEX_REQUEUE:
2679 : 0 : return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2680 : : case FUTEX_CMP_REQUEUE:
2681 : 173 : return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2682 : : case FUTEX_WAKE_OP:
2683 : 507892 : return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2684 : : case FUTEX_LOCK_PI:
2685 : 0 : return futex_lock_pi(uaddr, flags, val, timeout, 0);
2686 : : case FUTEX_UNLOCK_PI:
2687 : 0 : return futex_unlock_pi(uaddr, flags);
2688 : : case FUTEX_TRYLOCK_PI:
2689 : 0 : return futex_lock_pi(uaddr, flags, 0, timeout, 1);
2690 : : case FUTEX_WAIT_REQUEUE_PI:
2691 : 0 : val3 = FUTEX_BITSET_MATCH_ANY;
2692 : 0 : return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2693 : : uaddr2);
2694 : : case FUTEX_CMP_REQUEUE_PI:
2695 : 0 : return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2696 : : }
2697 : : return -ENOSYS;
2698 : : }
2699 : :
2700 : :
2701 : 0 : SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2702 : : struct timespec __user *, utime, u32 __user *, uaddr2,
2703 : : u32, val3)
2704 : : {
2705 : : struct timespec ts;
2706 : : ktime_t t, *tp = NULL;
2707 : : u32 val2 = 0;
2708 : 11270724 : int cmd = op & FUTEX_CMD_MASK;
2709 : :
2710 [ + + ][ + - ]: 11270724 : if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2711 [ + + ]: 914243 : cmd == FUTEX_WAIT_BITSET ||
2712 : 914243 : cmd == FUTEX_WAIT_REQUEUE_PI)) {
2713 [ + - ]: 406174 : if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2714 : : return -EFAULT;
2715 [ + - ]: 406174 : if (!timespec_valid(&ts))
2716 : : return -EINVAL;
2717 : :
2718 : 406174 : t = timespec_to_ktime(ts);
2719 [ - + ]: 406174 : if (cmd == FUTEX_WAIT)
2720 : 0 : t = ktime_add_safe(ktime_get(), t);
2721 : : tp = &t;
2722 : : }
2723 : : /*
2724 : : * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
2725 : : * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
2726 : : */
2727 [ + + ]: 11206788 : if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2728 [ + + ]: 11198990 : cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
2729 : 508065 : val2 = (u32) (unsigned long) utime;
2730 : :
2731 : 11206788 : return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2732 : : }
2733 : :
2734 : 0 : static int __init futex_init(void)
2735 : : {
2736 : : u32 curval;
2737 : : int i;
2738 : :
2739 : : /*
2740 : : * This will fail and we want it. Some arch implementations do
2741 : : * runtime detection of the futex_atomic_cmpxchg_inatomic()
2742 : : * functionality. We want to know that before we call in any
2743 : : * of the complex code paths. Also we want to prevent
2744 : : * registration of robust lists in that case. NULL is
2745 : : * guaranteed to fault and we get -EFAULT on functional
2746 : : * implementation, the non-functional ones will return
2747 : : * -ENOSYS.
2748 : : */
2749 [ # # ]: 0 : if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2750 : 0 : futex_cmpxchg_enabled = 1;
2751 : :
2752 [ # # ]: 0 : for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
2753 : 0 : plist_head_init(&futex_queues[i].chain);
2754 : 0 : spin_lock_init(&futex_queues[i].lock);
2755 : : }
2756 : :
2757 : 0 : return 0;
2758 : : }
2759 : : __initcall(futex_init);
|