/* * Fast Userspace Mutexes (which I call "Futexes!"). * (C) Rusty Russell, IBM 2002 * * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar * (C) Copyright 2003 Red Hat Inc, All Rights Reserved * * Removed page pinning, fix privately mapped COW pages and other cleanups * (C) Copyright 2003, 2004 Jamie Lokier * * Robust futex support started by Ingo Molnar * (C) Copyright 2006 Red Hat Inc, All Rights Reserved * Thanks to Thomas Gleixner for suggestions, analysis and fixes. * * PI-futex support started by Ingo Molnar and Thomas Gleixner * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar * Copyright (C) 2006 Timesys Corp., Thomas Gleixner * * PRIVATE futexes by Eric Dumazet * Copyright (C) 2007 Eric Dumazet * * Requeue-PI support by Darren Hart * Copyright (C) IBM Corporation, 2009 * Thanks to Thomas Gleixner for conceptual design and careful reviews. * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. * * "The futexes are also cursed." * "But they come in a choice of three flavours!" * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "rtmutex_common.h" int __read_mostly futex_cmpxchg_enabled; #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) #define FLAGS_SHARED 0x01 #define FLAGS_CLOCKRT 0x02 #define FLAGS_HAS_TIMEOUT 0x04 struct futex_pi_state { struct list_head list; struct rt_mutex pi_mutex; struct task_struct *owner; atomic_t refcount; union futex_key key; }; struct futex_q { struct plist_node list; struct task_struct *task; spinlock_t *lock_ptr; union futex_key key; struct futex_pi_state *pi_state; struct rt_mutex_waiter *rt_waiter; union futex_key *requeue_pi_key; u32 bitset; }; static const struct futex_q futex_q_init = { .key = FUTEX_KEY_INIT, .bitset = FUTEX_BITSET_MATCH_ANY }; struct futex_hash_bucket { spinlock_t lock; struct plist_head chain; }; static struct futex_hash_bucket futex_queues[1<both.word, (sizeof(key->both.word)+sizeof(key->both.ptr))/4, key->both.offset); return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)]; } static inline int match_futex(union futex_key *key1, union futex_key *key2) { return (key1 && key2 && key1->both.word == key2->both.word && key1->both.ptr == key2->both.ptr && key1->both.offset == key2->both.offset); } static void get_futex_key_refs(union futex_key *key) { if (!key->both.ptr) return; switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: ihold(key->shared.inode); break; case FUT_OFF_MMSHARED: atomic_inc(&key->private.mm->mm_count); break; } } static void drop_futex_key_refs(union futex_key *key) { if (!key->both.ptr) { WARN_ON_ONCE(1); return; } switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: iput(key->shared.inode); break; case FUT_OFF_MMSHARED: mmdrop(key->private.mm); break; } } static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page, *page_head; int err, ro = 0; key->both.offset = address % PAGE_SIZE; if (unlikely((address % sizeof(u32)) != 0)) return -EINVAL; address -= key->both.offset; if (!fshared) { if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) return -EFAULT; key->private.mm = mm; key->private.address = address; get_futex_key_refs(key); return 0; } again: err = get_user_pages_fast(address, 1, 1, &page); if (err == -EFAULT && rw == VERIFY_READ) { err = get_user_pages_fast(address, 1, 0, &page); ro = 1; } if (err < 0) return err; else err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE page_head = page; if (unlikely(PageTail(page))) { put_page(page); local_irq_disable(); if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { page_head = compound_head(page); if (page != page_head) { get_page(page_head); put_page(page); } local_irq_enable(); } else { local_irq_enable(); goto again; } } #else page_head = compound_head(page); if (page != page_head) { get_page(page_head); put_page(page); } #endif lock_page(page_head); if (!page_head->mapping) { int shmem_swizzled = PageSwapCache(page_head); unlock_page(page_head); put_page(page_head); if (shmem_swizzled) goto again; return -EFAULT; } if (PageAnon(page_head)) { if (ro) { err = -EFAULT; goto out; } key->both.offset |= FUT_OFF_MMSHARED; key->private.mm = mm; key->private.address = address; } else { key->both.offset |= FUT_OFF_INODE; key->shared.inode = page_head->mapping->host; key->shared.pgoff = page_head->index; } get_futex_key_refs(key); out: unlock_page(page_head); put_page(page_head); return err; } static inline void put_futex_key(union futex_key *key) { drop_futex_key_refs(key); } static int fault_in_user_writeable(u32 __user *uaddr) { struct mm_struct *mm = current->mm; int ret; down_read(&mm->mmap_sem); ret = fixup_user_fault(current, mm, (unsigned long)uaddr, FAULT_FLAG_WRITE); up_read(&mm->mmap_sem); return ret < 0 ? ret : 0; } static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key) { struct futex_q *this; plist_for_each_entry(this, &hb->chain, list) { if (match_futex(&this->key, key)) return this; } return NULL; } static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) { int ret; pagefault_disable(); ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); pagefault_enable(); return ret; } static int get_futex_value_locked(u32 *dest, u32 __user *from) { int ret; pagefault_disable(); ret = __copy_from_user_inatomic(dest, from, sizeof(u32)); pagefault_enable(); return ret ? -EFAULT : 0; } static int refill_pi_state_cache(void) { struct futex_pi_state *pi_state; if (likely(current->pi_state_cache)) return 0; pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL); if (!pi_state) return -ENOMEM; INIT_LIST_HEAD(&pi_state->list); pi_state->owner = NULL; atomic_set(&pi_state->refcount, 1); pi_state->key = FUTEX_KEY_INIT; current->pi_state_cache = pi_state; return 0; } static struct futex_pi_state * alloc_pi_state(void) { struct futex_pi_state *pi_state = current->pi_state_cache; WARN_ON(!pi_state); current->pi_state_cache = NULL; return pi_state; } static void free_pi_state(struct futex_pi_state *pi_state) { if (!atomic_dec_and_test(&pi_state->refcount)) return; if (pi_state->owner) { raw_spin_lock_irq(&pi_state->owner->pi_lock); list_del_init(&pi_state->list); raw_spin_unlock_irq(&pi_state->owner->pi_lock); rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); } if (current->pi_state_cache) kfree(pi_state); else { pi_state->owner = NULL; atomic_set(&pi_state->refcount, 1); current->pi_state_cache = pi_state; } } static struct task_struct * futex_find_get_task(pid_t pid) { struct task_struct *p; rcu_read_lock(); p = find_task_by_vpid(pid); if (p) get_task_struct(p); rcu_read_unlock(); return p; } void exit_pi_state_list(struct task_struct *curr) { struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; struct futex_hash_bucket *hb; union futex_key key = FUTEX_KEY_INIT; if (!futex_cmpxchg_enabled) return; raw_spin_lock_irq(&curr->pi_lock); while (!list_empty(head)) { next = head->next; pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; hb = hash_futex(&key); raw_spin_unlock_irq(&curr->pi_lock); spin_lock(&hb->lock); raw_spin_lock_irq(&curr->pi_lock); if (head->next != next) { spin_unlock(&hb->lock); continue; } WARN_ON(pi_state->owner != curr); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); pi_state->owner = NULL; raw_spin_unlock_irq(&curr->pi_lock); rt_mutex_unlock(&pi_state->pi_mutex); spin_unlock(&hb->lock); raw_spin_lock_irq(&curr->pi_lock); } raw_spin_unlock_irq(&curr->pi_lock); } /* * We need to check the following states: * * Waiter | pi_state | pi->owner | uTID | uODIED | ? * * [1] NULL | --- | --- | 0 | 0/1 | Valid * [2] NULL | --- | --- | >0 | 0/1 | Valid * * [3] Found | NULL | -- | Any | 0/1 | Invalid * * [4] Found | Found | NULL | 0 | 1 | Valid * [5] Found | Found | NULL | >0 | 1 | Invalid * * [6] Found | Found | task | 0 | 1 | Valid * * [7] Found | Found | NULL | Any | 0 | Invalid * * [8] Found | Found | task | ==taskTID | 0/1 | Valid * [9] Found | Found | task | 0 | 0 | Invalid * [10] Found | Found | task | !=taskTID | 0/1 | Invalid * * [1] Indicates that the kernel can acquire the futex atomically. We * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. * * [2] Valid, if TID does not belong to a kernel thread. If no matching * thread is found then it indicates that the owner TID has died. * * [3] Invalid. The waiter is queued on a non PI futex * * [4] Valid state after exit_robust_list(), which sets the user space * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. * * [5] The user space value got manipulated between exit_robust_list() * and exit_pi_state_list() * * [6] Valid state after exit_pi_state_list() which sets the new owner in * the pi_state but cannot access the user space value. * * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. * * [8] Owner and user space value match * * [9] There is no transient state which sets the user space TID to 0 * except exit_robust_list(), but this is indicated by the * FUTEX_OWNER_DIED bit. See [4] * * [10] There is no transient state which leaves owner and user space * TID out of sync. */ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps, struct task_struct *task) { struct futex_pi_state *pi_state = NULL; struct futex_q *this, *next; struct plist_head *head; struct task_struct *p; pid_t pid = uval & FUTEX_TID_MASK; head = &hb->chain; plist_for_each_entry_safe(this, next, head, list) { if (match_futex(&this->key, key)) { /* * Sanity check the waiter before increasing * the refcount and attaching to it. */ pi_state = this->pi_state; /* * Userspace might have messed up non-PI and * PI futexes [3] */ if (unlikely(!pi_state)) return -EINVAL; WARN_ON(!atomic_read(&pi_state->refcount)); /* * Handle the owner died case: */ if (uval & FUTEX_OWNER_DIED) { /* * exit_pi_state_list sets owner to NULL and * wakes the topmost waiter. The task which * acquires the pi_state->rt_mutex will fixup * owner. */ if (!pi_state->owner) { /* * No pi state owner, but the user * space TID is not 0. Inconsistent * state. [5] */ if (pid) return -EINVAL; /* * Take a ref on the state and * return. [4] */ goto out_state; } /* * If TID is 0, then either the dying owner * has not yet executed exit_pi_state_list() * or some waiter acquired the rtmutex in the * pi state, but did not yet fixup the TID in * user space. * * Take a ref on the state and return. [6] */ if (!pid) goto out_state; } else { /* * If the owner died bit is not set, * then the pi_state must have an * owner. [7] */ if (!pi_state->owner) return -EINVAL; } /* * Bail out if user space manipulated the * futex value. If pi state exists then the * owner TID must be the same as the user * space TID. [9/10] */ if (pid != task_pid_vnr(pi_state->owner)) return -EINVAL; /* * Protect against a corrupted uval. If uval * is 0x80000000 then pid is 0 and the waiter * bit is set. So the deadlock check in the * calling code has failed and we did not fall * into the check above due to !pid. */ if (task && pi_state->owner == task) return -EDEADLK; out_state: atomic_inc(&pi_state->refcount); *ps = pi_state; return 0; } } /* * We are the first waiter - try to look up the real owner and attach * the new pi_state to it, but bail out when TID = 0 [1] */ if (!pid) return -ESRCH; p = futex_find_get_task(pid); if (!p) return -ESRCH; if (!p->mm) { put_task_struct(p); return -EPERM; } /* * We need to look at the task state flags to figure out, * whether the task is exiting. To protect against the do_exit * change of the task flags, we do this protected by * p->pi_lock: */ raw_spin_lock_irq(&p->pi_lock); if (unlikely(p->flags & PF_EXITING)) { int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; raw_spin_unlock_irq(&p->pi_lock); put_task_struct(p); return ret; } /* * No existing pi state. First waiter. [2] */ pi_state = alloc_pi_state(); rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); pi_state->key = *key; WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &p->pi_state_list); pi_state->owner = p; raw_spin_unlock_irq(&p->pi_lock); put_task_struct(p); *ps = pi_state; return 0; } static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps, struct task_struct *task, int set_waiters) { int lock_taken, ret, ownerdied = 0; u32 uval, newval, curval, vpid = task_pid_vnr(task); retry: ret = lock_taken = 0; newval = vpid; if (set_waiters) newval |= FUTEX_WAITERS; if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval))) return -EFAULT; if ((unlikely((curval & FUTEX_TID_MASK) == vpid))) return -EDEADLK; /* * Surprise - we got the lock, but we do not trust user space at all. */ if (unlikely(!curval)) { /* * We verify whether there is kernel state for this * futex. If not, we can safely assume, that the 0 -> * TID transition is correct. If state exists, we do * not bother to fixup the user space state as it was * corrupted already. */ return futex_top_waiter(hb, key) ? -EINVAL : 1; } uval = curval; newval = curval | FUTEX_WAITERS; if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { newval = (curval & ~FUTEX_TID_MASK) | vpid; ownerdied = 0; lock_taken = 1; } if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) return -EFAULT; if (unlikely(curval != uval)) goto retry; if (unlikely(lock_taken)) return 1; /* * We dont have the lock. Look up the PI state (or create it if * we are the first waiter): */ ret = lookup_pi_state(uval, hb, key, ps, task); if (unlikely(ret)) { switch (ret) { case -ESRCH: if (get_futex_value_locked(&curval, uaddr)) return -EFAULT; if (curval & FUTEX_OWNER_DIED) { ownerdied = 1; goto retry; } default: break; } } return ret; } static void __unqueue_futex(struct futex_q *q) { struct futex_hash_bucket *hb; if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr)) || WARN_ON(plist_node_empty(&q->list))) return; hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); plist_del(&q->list, &hb->chain); } static void wake_futex(struct futex_q *q) { struct task_struct *p = q->task; get_task_struct(p); __unqueue_futex(q); /* * The waiting task can free the futex_q as soon as * q->lock_ptr = NULL is written, without taking any locks. A * memory barrier is required here to prevent the following * store to lock_ptr from getting ahead of the plist_del. */ smp_wmb(); q->lock_ptr = NULL; wake_up_state(p, TASK_NORMAL); put_task_struct(p); } static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) { struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 uninitialized_var(curval), newval; int ret = 0; if (!pi_state) return -EINVAL; if (pi_state->owner != current) return -EINVAL; raw_spin_lock(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); if (!new_owner) new_owner = this->task; /* * We pass it to the next owner. The WAITERS bit is always * kept enabled while there is PI state around. We cleanup the * owner died bit, because we are the owner. */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) ret = -EFAULT; else if (curval != uval) ret = -EINVAL; if (ret) { raw_spin_unlock(&pi_state->pi_mutex.wait_lock); return ret; } raw_spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); raw_spin_unlock_irq(&pi_state->owner->pi_lock); raw_spin_lock_irq(&new_owner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &new_owner->pi_state_list); pi_state->owner = new_owner; raw_spin_unlock_irq(&new_owner->pi_lock); raw_spin_unlock(&pi_state->pi_mutex.wait_lock); rt_mutex_unlock(&pi_state->pi_mutex); return 0; } static int unlock_futex_pi(u32 __user *uaddr, u32 uval) { u32 uninitialized_var(oldval); if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0)) return -EFAULT; if (oldval != uval) return -EAGAIN; return 0; } static inline void double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) { if (hb1 <= hb2) { spin_lock(&hb1->lock); if (hb1 < hb2) spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); } else { spin_lock(&hb2->lock); spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); } } static inline void double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) { spin_unlock(&hb1->lock); if (hb1 != hb2) spin_unlock(&hb2->lock); } static int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) { struct futex_hash_bucket *hb; struct futex_q *this, *next; struct plist_head *head; union futex_key key = FUTEX_KEY_INIT; int ret; if (!bitset) return -EINVAL; ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ); if (unlikely(ret != 0)) goto out; hb = hash_futex(&key); spin_lock(&hb->lock); head = &hb->chain; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key)) { if (this->pi_state || this->rt_waiter) { ret = -EINVAL; break; } if (!(this->bitset & bitset)) continue; wake_futex(this); if (++ret >= nr_wake) break; } } spin_unlock(&hb->lock); put_futex_key(&key); out: return ret; } static int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head; struct futex_q *this, *next; int ret, op_ret; retry: ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out_put_key1; hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); retry_private: double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { double_unlock_hb(hb1, hb2); #ifndef CONFIG_MMU ret = op_ret; goto out_put_keys; #endif if (unlikely(op_ret != -EFAULT)) { ret = op_ret; goto out_put_keys; } ret = fault_in_user_writeable(uaddr2); if (ret) goto out_put_keys; if (!(flags & FLAGS_SHARED)) goto retry_private; put_futex_key(&key2); put_futex_key(&key1); goto retry; } head = &hb1->chain; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key1)) { wake_futex(this); if (++ret >= nr_wake) break; } } if (op_ret > 0) { head = &hb2->chain; op_ret = 0; plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key2)) { wake_futex(this); if (++op_ret >= nr_wake2) break; } } ret += op_ret; } double_unlock_hb(hb1, hb2); out_put_keys: put_futex_key(&key2); out_put_key1: put_futex_key(&key1); out: return ret; } static inline void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2, union futex_key *key2) { if (likely(&hb1->chain != &hb2->chain)) { plist_del(&q->list, &hb1->chain); plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; } get_futex_key_refs(key2); q->key = *key2; } static inline void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, struct futex_hash_bucket *hb) { get_futex_key_refs(key); q->key = *key; __unqueue_futex(q); WARN_ON(!q->rt_waiter); q->rt_waiter = NULL; q->lock_ptr = &hb->lock; wake_up_state(q->task, TASK_NORMAL); } /** * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter * @pifutex: the user address of the to futex * @hb1: the from futex hash bucket, must be locked by the caller * @hb2: the to futex hash bucket, must be locked by the caller * @key1: the from futex key * @key2: the to futex key * @ps: address to store the pi_state pointer * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Try and get the lock on behalf of the top waiter if we can do it atomically. * Wake the top waiter if we succeed. If the caller specified set_waiters, * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. * hb1 and hb2 must be held by the caller. * * Return: * 0 - failed to acquire the lock atomically; * >0 - acquired the lock, return value is vpid of the top_waiter * <0 - error */ static int futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2, union futex_key *key1, union futex_key *key2, struct futex_pi_state **ps, int set_waiters) { struct futex_q *top_waiter = NULL; u32 curval; int ret, vpid; if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; top_waiter = futex_top_waiter(hb1, key1); if (!top_waiter) return 0; if (!match_futex(top_waiter->requeue_pi_key, key2)) return -EINVAL; /* * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in * the contended case or if set_waiters is 1. The pi_state is returned * in ps in contended cases. */ vpid = task_pid_vnr(top_waiter->task); ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, set_waiters); if (ret == 1) { requeue_pi_wake_futex(top_waiter, key2, hb2); return vpid; } return ret; } static int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; int drop_count = 0, task_count = 0, ret; struct futex_pi_state *pi_state = NULL; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; if (requeue_pi) { /* * Requeue PI only works on two distinct uaddrs. This * check is only valid for private futexes. See below. */ if (uaddr1 == uaddr2) return -EINVAL; /* * requeue_pi requires a pi_state, try to allocate it now * without any locks in case it fails. */ if (refill_pi_state_cache()) return -ENOMEM; if (nr_wake != 1) return -EINVAL; } retry: if (pi_state != NULL) { free_pi_state(pi_state); pi_state = NULL; } ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, requeue_pi ? VERIFY_WRITE : VERIFY_READ); if (unlikely(ret != 0)) goto out_put_key1; /* * The check above which compares uaddrs is not sufficient for * shared futexes. We need to compare the keys: */ if (requeue_pi && match_futex(&key1, &key2)) { ret = -EINVAL; goto out_put_keys; } hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); retry_private: double_lock_hb(hb1, hb2); if (likely(cmpval != NULL)) { u32 curval; ret = get_futex_value_locked(&curval, uaddr1); if (unlikely(ret)) { double_unlock_hb(hb1, hb2); ret = get_user(curval, uaddr1); if (ret) goto out_put_keys; if (!(flags & FLAGS_SHARED)) goto retry_private; put_futex_key(&key2); put_futex_key(&key1); goto retry; } if (curval != *cmpval) { ret = -EAGAIN; goto out_unlock; } } if (requeue_pi && (task_count - nr_wake < nr_requeue)) { ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, &key2, &pi_state, nr_requeue); /* * At this point the top_waiter has either taken uaddr2 or is * waiting on it. If the former, then the pi_state will not * exist yet, look it up one more time to ensure we have a * reference to it. If the lock was taken, ret contains the * vpid of the top waiter task. */ if (ret > 0) { WARN_ON(pi_state); drop_count++; task_count++; /* * If we acquired the lock, then the user * space value of uaddr2 should be vpid. It * cannot be changed by the top waiter as it * is blocked on hb2 lock if it tries to do * so. If something fiddled with it behind our * back the pi state lookup might unearth * it. So we rather use the known value than * rereading and handing potential crap to * lookup_pi_state. */ ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); } switch (ret) { case 0: break; case -EFAULT: double_unlock_hb(hb1, hb2); put_futex_key(&key2); put_futex_key(&key1); ret = fault_in_user_writeable(uaddr2); if (!ret) goto retry; goto out; case -EAGAIN: double_unlock_hb(hb1, hb2); put_futex_key(&key2); put_futex_key(&key1); cond_resched(); goto retry; default: goto out_unlock; } } head1 = &hb1->chain; plist_for_each_entry_safe(this, next, head1, list) { if (task_count - nr_wake >= nr_requeue) break; if (!match_futex(&this->key, &key1)) continue; if ((requeue_pi && !this->rt_waiter) || (!requeue_pi && this->rt_waiter)) { ret = -EINVAL; break; } if (++task_count <= nr_wake && !requeue_pi) { wake_futex(this); continue; } if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { ret = -EINVAL; break; } if (requeue_pi) { atomic_inc(&pi_state->refcount); this->pi_state = pi_state; ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, this->rt_waiter, this->task, 1); if (ret == 1) { requeue_pi_wake_futex(this, &key2, hb2); drop_count++; continue; } else if (ret) { this->pi_state = NULL; free_pi_state(pi_state); goto out_unlock; } } requeue_futex(this, hb1, hb2, &key2); drop_count++; } out_unlock: double_unlock_hb(hb1, hb2); while (--drop_count >= 0) drop_futex_key_refs(&key1); out_put_keys: put_futex_key(&key2); out_put_key1: put_futex_key(&key1); out: if (pi_state != NULL) free_pi_state(pi_state); return ret ? ret : task_count; } static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) __acquires(&hb->lock) { struct futex_hash_bucket *hb; hb = hash_futex(&q->key); q->lock_ptr = &hb->lock; spin_lock(&hb->lock); return hb; } static inline void queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) __releases(&hb->lock) { spin_unlock(&hb->lock); } static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) __releases(&hb->lock) { int prio; prio = min(current->normal_prio, MAX_RT_PRIO); plist_node_init(&q->list, prio); plist_add(&q->list, &hb->chain); q->task = current; spin_unlock(&hb->lock); } static int unqueue_me(struct futex_q *q) { spinlock_t *lock_ptr; int ret = 0; retry: lock_ptr = q->lock_ptr; barrier(); if (lock_ptr != NULL) { spin_lock(lock_ptr); if (unlikely(lock_ptr != q->lock_ptr)) { spin_unlock(lock_ptr); goto retry; } __unqueue_futex(q); BUG_ON(q->pi_state); spin_unlock(lock_ptr); ret = 1; } drop_futex_key_refs(&q->key); return ret; } static void unqueue_me_pi(struct futex_q *q) __releases(q->lock_ptr) { __unqueue_futex(q); BUG_ON(!q->pi_state); free_pi_state(q->pi_state); q->pi_state = NULL; spin_unlock(q->lock_ptr); } static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, struct task_struct *newowner) { u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; struct task_struct *oldowner = pi_state->owner; u32 uval, uninitialized_var(curval), newval; int ret; if (!pi_state->owner) newtid |= FUTEX_OWNER_DIED; retry: if (get_futex_value_locked(&uval, uaddr)) goto handle_fault; while (1) { newval = (uval & FUTEX_OWNER_DIED) | newtid; if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) goto handle_fault; if (curval == uval) break; uval = curval; } if (pi_state->owner != NULL) { raw_spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); raw_spin_unlock_irq(&pi_state->owner->pi_lock); } pi_state->owner = newowner; raw_spin_lock_irq(&newowner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &newowner->pi_state_list); raw_spin_unlock_irq(&newowner->pi_lock); return 0; handle_fault: spin_unlock(q->lock_ptr); ret = fault_in_user_writeable(uaddr); spin_lock(q->lock_ptr); if (pi_state->owner != oldowner) return 0; if (ret) return ret; goto retry; } static long futex_wait_restart(struct restart_block *restart); static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) { struct task_struct *owner; int ret = 0; if (locked) { if (q->pi_state->owner != current) ret = fixup_pi_state_owner(uaddr, q, current); goto out; } if (q->pi_state->owner == current) { if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { locked = 1; goto out; } raw_spin_lock(&q->pi_state->pi_mutex.wait_lock); owner = rt_mutex_owner(&q->pi_state->pi_mutex); if (!owner) owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock); ret = fixup_pi_state_owner(uaddr, q, owner); goto out; } if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " "pi-state %p\n", ret, q->pi_state->pi_mutex.owner, q->pi_state->owner); out: return ret ? ret : locked; } static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, struct hrtimer_sleeper *timeout) { set_current_state(TASK_INTERRUPTIBLE); queue_me(q, hb); if (timeout) { hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); if (!hrtimer_active(&timeout->timer)) timeout->task = NULL; } if (likely(!plist_node_empty(&q->list))) { if (!timeout || timeout->task) freezable_schedule(); } __set_current_state(TASK_RUNNING); } static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, struct futex_q *q, struct futex_hash_bucket **hb) { u32 uval; int ret; retry: ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ); if (unlikely(ret != 0)) return ret; retry_private: *hb = queue_lock(q); ret = get_futex_value_locked(&uval, uaddr); if (ret) { queue_unlock(q, *hb); ret = get_user(uval, uaddr); if (ret) goto out; if (!(flags & FLAGS_SHARED)) goto retry_private; put_futex_key(&q->key); goto retry; } if (uval != val) { queue_unlock(q, *hb); ret = -EWOULDBLOCK; } out: if (ret) put_futex_key(&q->key); return ret; } static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) { struct hrtimer_sleeper timeout, *to = NULL; struct restart_block *restart; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; int ret; if (!bitset) return -EINVAL; q.bitset = bitset; if (abs_time) { to = &timeout; hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? CLOCK_REALTIME : CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, task_get_effective_timer_slack(current)); } retry: ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) goto out; futex_wait_queue_me(hb, &q, to); ret = 0; if (!unqueue_me(&q)) goto out; ret = -ETIMEDOUT; if (to && !to->task) goto out; if (!signal_pending(current)) goto retry; ret = -ERESTARTSYS; if (!abs_time) goto out; restart = ¤t_thread_info()->restart_block; restart->fn = futex_wait_restart; restart->futex.uaddr = uaddr; restart->futex.val = val; restart->futex.time = abs_time->tv64; restart->futex.bitset = bitset; restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; ret = -ERESTART_RESTARTBLOCK; out: if (to) { hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); } return ret; } static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = restart->futex.uaddr; ktime_t t, *tp = NULL; if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { t.tv64 = restart->futex.time; tp = &t; } restart->fn = do_no_restart_syscall; return (long)futex_wait(uaddr, restart->futex.flags, restart->futex.val, tp, restart->futex.bitset); } static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; int res, ret; if (refill_pi_state_cache()) return -ENOMEM; if (time) { to = &timeout; hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires(&to->timer, *time); } retry: ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; retry_private: hb = queue_lock(&q); ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); if (unlikely(ret)) { switch (ret) { case 1: ret = 0; goto out_unlock_put_key; case -EFAULT: goto uaddr_faulted; case -EAGAIN: queue_unlock(&q, hb); put_futex_key(&q.key); cond_resched(); goto retry; default: goto out_unlock_put_key; } } queue_me(&q, hb); WARN_ON(!q.pi_state); if (!trylock) ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1); else { ret = rt_mutex_trylock(&q.pi_state->pi_mutex); ret = ret ? 0 : -EWOULDBLOCK; } spin_lock(q.lock_ptr); res = fixup_owner(uaddr, &q, !ret); if (res) ret = (res < 0) ? res : 0; if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) rt_mutex_unlock(&q.pi_state->pi_mutex); unqueue_me_pi(&q); goto out_put_key; out_unlock_put_key: queue_unlock(&q, hb); out_put_key: put_futex_key(&q.key); out: if (to) destroy_hrtimer_on_stack(&to->timer); return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: queue_unlock(&q, hb); ret = fault_in_user_writeable(uaddr); if (ret) goto out_put_key; if (!(flags & FLAGS_SHARED)) goto retry_private; put_futex_key(&q.key); goto retry; } static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) { struct futex_hash_bucket *hb; struct futex_q *this, *next; struct plist_head *head; union futex_key key = FUTEX_KEY_INIT; u32 uval, vpid = task_pid_vnr(current); int ret; retry: if (get_user(uval, uaddr)) return -EFAULT; if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; hb = hash_futex(&key); spin_lock(&hb->lock); /* * To avoid races, try to do the TID -> 0 atomic transition * again. If it succeeds then we can return without waking * anyone else up. We only try this if neither the waiters nor * the owner died bit are set. */ if (!(uval & ~FUTEX_TID_MASK) && cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) goto pi_faulted; if (unlikely(uval == vpid)) goto out_unlock; head = &hb->chain; plist_for_each_entry_safe(this, next, head, list) { if (!match_futex (&this->key, &key)) continue; ret = wake_futex_pi(uaddr, uval, this); if (ret == -EFAULT) goto pi_faulted; goto out_unlock; } /* * No waiters - kernel unlocks the futex: */ ret = unlock_futex_pi(uaddr, uval); if (ret == -EFAULT) goto pi_faulted; out_unlock: spin_unlock(&hb->lock); put_futex_key(&key); out: return ret; pi_faulted: spin_unlock(&hb->lock); put_futex_key(&key); ret = fault_in_user_writeable(uaddr); if (!ret) goto retry; return ret; } static inline int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, struct futex_q *q, union futex_key *key2, struct hrtimer_sleeper *timeout) { int ret = 0; if (!match_futex(&q->key, key2)) { WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); plist_del(&q->list, &hb->chain); ret = -EWOULDBLOCK; if (timeout && !timeout->task) ret = -ETIMEDOUT; else if (signal_pending(current)) ret = -ERESTARTNOINTR; } return ret; } static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset, u32 __user *uaddr2) { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; int res, ret; if (uaddr == uaddr2) return -EINVAL; if (!bitset) return -EINVAL; if (abs_time) { to = &timeout; hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? CLOCK_REALTIME : CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(to, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, task_get_effective_timer_slack(current)); } debug_rt_mutex_init_waiter(&rt_waiter); rt_waiter.task = NULL; ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; q.bitset = bitset; q.rt_waiter = &rt_waiter; q.requeue_pi_key = &key2; ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) goto out_key2; /* * The check above which compares uaddrs is not sufficient for * shared futexes. We need to compare the keys: */ if (match_futex(&q.key, &key2)) { ret = -EINVAL; goto out_put_keys; } /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); spin_lock(&hb->lock); ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); spin_unlock(&hb->lock); if (ret) goto out_put_keys; if (!q.rt_waiter) { if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); spin_unlock(q.lock_ptr); } } else { WARN_ON(!q.pi_state); pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); debug_rt_mutex_free_waiter(&rt_waiter); spin_lock(q.lock_ptr); res = fixup_owner(uaddr2, &q, !ret); if (res) ret = (res < 0) ? res : 0; unqueue_me_pi(&q); } if (ret == -EFAULT) { if (pi_mutex && rt_mutex_owner(pi_mutex) == current) rt_mutex_unlock(pi_mutex); } else if (ret == -EINTR) { ret = -EWOULDBLOCK; } out_put_keys: put_futex_key(&q.key); out_key2: put_futex_key(&key2); out: if (to) { hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); } return ret; } SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { if (!futex_cmpxchg_enabled) return -ENOSYS; if (unlikely(len != sizeof(*head))) return -EINVAL; current->robust_list = head; return 0; } SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head __user * __user *, head_ptr, size_t __user *, len_ptr) { struct robust_list_head __user *head; unsigned long ret; struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); rcu_read_lock(); ret = -ESRCH; if (!pid) p = current; else { p = find_task_by_vpid(pid); if (!p) goto err_unlock; } ret = -EPERM; if (!ptrace_may_access(p, PTRACE_MODE_READ)) goto err_unlock; head = p->robust_list; rcu_read_unlock(); if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); err_unlock: rcu_read_unlock(); return ret; } int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; retry: if (get_user(uval, uaddr)) return -1; if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) { mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) { if (fault_in_user_writeable(uaddr)) return -1; goto retry; } if (nval != uval) goto retry; if (!pi && (uval & FUTEX_WAITERS)) futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); } return 0; } static inline int fetch_robust_entry(struct robust_list __user **entry, struct robust_list __user * __user *head, unsigned int *pi) { unsigned long uentry; if (get_user(uentry, (unsigned long __user *)head)) return -EFAULT; *entry = (void __user *)(uentry & ~1UL); *pi = uentry & 1; return 0; } void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned int uninitialized_var(next_pi); unsigned long futex_offset; int rc; if (!futex_cmpxchg_enabled) return; if (fetch_robust_entry(&entry, &head->list.next, &pi)) return; if (get_user(futex_offset, &head->futex_offset)) return; if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) return; next_entry = NULL; while (entry != &head->list) { rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); if (entry != pending) if (handle_futex_death((void __user *)entry + futex_offset, curr, pi)) return; if (rc) return; entry = next_entry; pi = next_pi; if (!--limit) break; cond_resched(); } if (pending) handle_futex_death((void __user *)pending + futex_offset, curr, pip); } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { int cmd = op & FUTEX_CMD_MASK; unsigned int flags = 0; if (!(op & FUTEX_PRIVATE_FLAG)) flags |= FLAGS_SHARED; if (op & FUTEX_CLOCK_REALTIME) { flags |= FLAGS_CLOCKRT; if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) return -ENOSYS; } switch (cmd) { case FUTEX_LOCK_PI: case FUTEX_UNLOCK_PI: case FUTEX_TRYLOCK_PI: case FUTEX_WAIT_REQUEUE_PI: case FUTEX_CMP_REQUEUE_PI: if (!futex_cmpxchg_enabled) return -ENOSYS; } switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAIT_BITSET: return futex_wait(uaddr, flags, val, timeout, val3); case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE: return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); case FUTEX_CMP_REQUEUE: return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: return futex_lock_pi(uaddr, flags, val, timeout, 0); case FUTEX_UNLOCK_PI: return futex_unlock_pi(uaddr, flags); case FUTEX_TRYLOCK_PI: return futex_lock_pi(uaddr, flags, 0, timeout, 1); case FUTEX_WAIT_REQUEUE_PI: val3 = FUTEX_BITSET_MATCH_ANY; return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, uaddr2); case FUTEX_CMP_REQUEUE_PI: return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); } return -ENOSYS; } SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, struct timespec __user *, utime, u32 __user *, uaddr2, u32, val3) { struct timespec ts; ktime_t t, *tp = NULL; u32 val2 = 0; int cmd = op & FUTEX_CMD_MASK; if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || cmd == FUTEX_WAIT_REQUEUE_PI)) { if (copy_from_user(&ts, utime, sizeof(ts)) != 0) return -EFAULT; if (!timespec_valid(&ts)) return -EINVAL; t = timespec_to_ktime(ts); if (cmd == FUTEX_WAIT) t = ktime_add_safe(ktime_get(), t); tp = &t; } if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } static int __init futex_init(void) { u32 curval; int i; if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) futex_cmpxchg_enabled = 1; for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { plist_head_init(&futex_queues[i].chain); spin_lock_init(&futex_queues[i].lock); } return 0; } __initcall(futex_init);