Discussion:
[PATCH 6/7] simplewait: don't run a possibly infinite number of wake under raw lock
Paul Gortmaker
2014-10-18 00:23:01 UTC
Permalink
The simple wait queues use a raw lock in order to be functional
for the preempt-rt kernels. PeterZ suggested[1] the following
change to ensure we come up for air now and again in order to be
deterministic.

I'm not really in love with the solution of passing the flags around,
but couldn't think of anything cleaner to achieve the same thing.

[1] http://marc.info/?l=linux-kernel&m=138089860308430

Cc: Peter Zijlstra <***@infradead.org>
Signed-off-by: Paul Gortmaker <***@windriver.com>

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2a57e00250f9..46e2591c22b6 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -262,7 +262,7 @@ void __swake_up(struct swait_head *q, unsigned int mode, int nr);
void __cwake_up_locked_key(struct cwait_head *q, unsigned int mode, void *key);
void __cwake_up_sync_key(struct cwait_head *q, unsigned int mode, int nr, void *key);
void __cwake_up_locked(struct cwait_head *q, unsigned int mode, int nr);
-void __swake_up_locked(struct swait_head *q, unsigned int mode, int nr);
+void __swake_up_locked(struct swait_head *q, unsigned int mode, int nr, unsigned long *flags);
void __cwake_up_sync(struct cwait_head *q, unsigned int mode, int nr);
void __cwake_up_bit(struct cwait_head *, void *, int);
int __cwait_on_bit(struct cwait_head *, struct cwait_bit *, cwait_bit_action_f *, unsigned);
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 7a165c697956..87ef42158fdf 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -32,7 +32,7 @@ void complete(struct completion *x)

raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
- __swake_up_locked(&x->wait, TASK_NORMAL, 1);
+ __swake_up_locked(&x->wait, TASK_NORMAL, 1, &flags);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);
@@ -52,7 +52,7 @@ void complete_all(struct completion *x)

raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done += UINT_MAX/2;
- __swake_up_locked(&x->wait, TASK_NORMAL, 0);
+ __swake_up_locked(&x->wait, TASK_NORMAL, 0, &flags);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 634427c25945..25e5886ed8d9 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -123,8 +123,12 @@ static void __cwake_up_common(struct cwait_head *q, unsigned int mode,
}
}

+/*
+ * The swait version gets the extra flags arg so that we can mitigate
+ * against a possibly large number of wakeups done under a raw lock.
+ */
static void __swake_up_common(struct swait_head *q, unsigned int mode,
- int nr_exclusive)
+ int nr_exclusive, unsigned long *flags)
{
struct swait *curr, *next;
int woken = 0;
@@ -146,6 +150,10 @@ static void __swake_up_common(struct swait_head *q, unsigned int mode,
break;
}

+ if (need_resched()) {
+ raw_spin_unlock_irqrestore(&q->lock, *flags);
+ raw_spin_lock_irqsave(&q->lock, *flags);
+ }
}
}

@@ -178,7 +186,7 @@ void __swake_up(struct swait_head *q, unsigned int mode, int nr_exclusive)
return;

raw_spin_lock_irqsave(&q->lock, flags);
- __swake_up_common(q, mode, nr_exclusive);
+ __swake_up_common(q, mode, nr_exclusive, &flags);
raw_spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(__swake_up);
@@ -192,12 +200,13 @@ void __cwake_up_locked(struct cwait_head *q, unsigned int mode, int nr)
}
EXPORT_SYMBOL_GPL(__cwake_up_locked);

-void __swake_up_locked(struct swait_head *q, unsigned int state, int nr)
+void __swake_up_locked(struct swait_head *q, unsigned int state, int nr,
+ unsigned long *flags)
{
if (!swait_active(q))
return;

- __swake_up_common(q, state, nr);
+ __swake_up_common(q, state, nr, flags);
}
EXPORT_SYMBOL_GPL(__swake_up_locked);
--
1.9.2
Paul Gortmaker
2014-10-18 00:22:56 UTC
Permalink
The existing wait queue code supports custom call backs and an
exclusive flag that can be used to limit the number of call backs
executed. Most waiters do not need these two features, and so we
are adding simple wait queue support that reduces overhead for
users that aren't using those features.

The end goal is to explicitly distinguish between complex wait
and simple wait in the names of functions and structs. We avoid
re-using the old namespace like "add_wait_foo(), to ensure people
play an active role in choosing which variant they want to use.

In order to achieve this in an incremental way that preserves
bisection, avoids "flag day" type changes, and allows tree wide
changes to be done at convenient times, we will do the following:

1) rename existing structs and functions with an additonal "c"
to indicate they are the complex variants [limited to wait.h]

2) introduce temporary wait_xyz() ----> cwait_xyz() mappings that will
let us do tree-wide conversions at our leisure (with coccinelle).
The mappings can be disabled with #undef CWAIT_COMPAT for testing.

3) update existing core implementation of complex wait functions in
kernel/sched/wait.c to have "c" prefix and hence not rely on #2

4) introduce simple wait support as swait_xyz() and friends into the
now prepared kernel/sched/wait.c and include/linux/wait.h files.

5) deploy swait support for an initial select number of subsystems,
like completions and RCU.

This commit achieves #1 and #2 in a single commit, as the two must be
paired together to ensure bisection is not broken.

Once the above are done, we will probably want to continue by:

a) Continue converting more cwait users over to swait on a per subsystem
basis, for systems not really making use of the added functionality.

b) Use coccinelle to convert remaining implicit complex wait calls like
wait_ABC() into cwait_ABC() as a rc1 [quiescent] treewide change.

c) remove the temporary mappings added in #2 above, once there are
no more remaining ambiguous wait users w/o a "c" or "s" prefix.

d) Use coccinelle to remove existing wait_queue_t and wait_queue_head_t
typedef users, and delete the typedefs.

Note that the "queue" has been dropped from waiter names where
appropriate; it was confusing anyway, since the list head really
served as the actual "queue", and the list elements were just individual
waiters, and not really queues themselves. This helps shorten some of
the more cumbersome names like "__add_wait_queue_tail_exclusive()".

Signed-off-by: Paul Gortmaker <***@windriver.com>

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6fb1ba5f9b2f..526e398cc249 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -9,35 +9,55 @@
#include <asm/current.h>
#include <uapi/linux/wait.h>

-typedef struct __wait_queue wait_queue_t;
-typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
-int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
+/*
+ * We are adding the distinction between complex wait queues with custom
+ * callbacks, and capped/exclusive number of wakes; vs simple wait queues
+ * that won't support either of those features. Enable temporary mapping
+ * of wait_foo --> cwait_foo for ease of transition. The define will
+ * enable ease of removal later, and allows a one-line change to enable
+ * testing of the coccinelle transformation tree-wide.
+ */
+#define CWAIT_COMPAT
+
+typedef struct cwait cwait_t;
+typedef int (*cwait_func_t)(cwait_t *wait, unsigned mode, int flags, void *key);
+int default_cwake_function(cwait_t *wait, unsigned mode, int flags, void *key);

-struct __wait_queue {
+struct cwait {
unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
void *private;
- wait_queue_func_t func;
+ cwait_func_t func;
struct list_head task_list;
};

-struct wait_bit_key {
+struct cwait_bit_key {
void *flags;
int bit_nr;
-#define WAIT_ATOMIC_T_BIT_NR -1
+#define CWAIT_ATOMIC_T_BIT_NR -1
unsigned long private;
};

-struct wait_bit_queue {
- struct wait_bit_key key;
- wait_queue_t wait;
+struct cwait_bit {
+ struct cwait_bit_key key;
+ struct cwait wait;
};

-struct __wait_queue_head {
+struct cwait_head {
spinlock_t lock;
struct list_head task_list;
};
-typedef struct __wait_queue_head wait_queue_head_t;
+typedef struct cwait_head cwait_head_t;
+
+#ifdef CWAIT_COMPAT
+#define wait_queue_t cwait_t
+#define wait_queue_head_t cwait_head_t
+#define wait_queue_func_t cwait_func_t
+#define default_wake_function default_cwake_function
+#define wait_bit_key cwait_bit_key
+#define wait_bit_queue cwait_bit
+#define WAIT_ATOMIC_T_BIT_NR CWAIT_ATOMIC_T_BIT_NR
+#endif

struct task_struct;

@@ -45,70 +65,93 @@ struct task_struct;
* Macros for declaration and initialisaton of the datatypes
*/

-#define __WAITQUEUE_INITIALIZER(name, tsk) { \
+#define CWAIT_INITIALIZER(name, tsk) { \
.private = tsk, \
- .func = default_wake_function, \
+ .func = default_cwake_function, \
.task_list = { NULL, NULL } }

-#define DECLARE_WAITQUEUE(name, tsk) \
- wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
+#define DECLARE_CWAIT(name, tsk) \
+ struct cwait name = CWAIT_INITIALIZER(name, tsk)

-#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
+#define CWAIT_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.task_list = { &(name).task_list, &(name).task_list } }

-#define DECLARE_WAIT_QUEUE_HEAD(name) \
- wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
+#define DECLARE_CWAIT_HEAD(name) \
+ struct cwait_head name = CWAIT_HEAD_INITIALIZER(name)

-#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
+#define CWAIT_BIT_KEY_INITIALIZER(word, bit) \
{ .flags = word, .bit_nr = bit, }

-#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \
- { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
+#define CWAIT_ATOMIC_T_KEY_INITIALIZER(p) \
+ { .flags = p, .bit_nr = CWAIT_ATOMIC_T_BIT_NR, }

-extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
+extern void __init_cwait_head(struct cwait_head *q, const char *name,
+ struct lock_class_key *);

-#define init_waitqueue_head(q) \
+#define init_cwait_head(q) \
do { \
static struct lock_class_key __key; \
\
- __init_waitqueue_head((q), #q, &__key); \
+ __init_cwait_head((q), #q, &__key); \
} while (0)

#ifdef CONFIG_LOCKDEP
-# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
- ({ init_waitqueue_head(&name); name; })
-# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
- wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+# define CWAIT_HEAD_INIT_ONSTACK(name) \
+ ({ init_cwait_head(&name); name; })
+# define DECLARE_CWAIT_HEAD_ONSTACK(name) \
+ struct cwait_head name = CWAIT_HEAD_INIT_ONSTACK(name)
#else
-# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
+# define DECLARE_CWAIT_HEAD_ONSTACK(name) DECLARE_CWAIT_HEAD(name)
#endif

-static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
+static inline void init_cwait_entry(struct cwait *q, struct task_struct *p)
{
q->flags = 0;
q->private = p;
- q->func = default_wake_function;
+ q->func = default_cwake_function;
}

-static inline void
-init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
+static inline void init_cwait_func_entry(struct cwait *q, cwait_func_t func)
{
q->flags = 0;
q->private = NULL;
q->func = func;
}

-static inline int waitqueue_active(wait_queue_head_t *q)
+#ifdef CWAIT_COMPAT
+#define DECLARE_WAITQUEUE DECLARE_CWAIT
+#define __WAITQUEUE_INITIALIZER CWAIT_INITIALIZER
+#define DECLARE_WAIT_QUEUE_HEAD DECLARE_CWAIT_HEAD
+#define __WAIT_QUEUE_HEAD_INITIALIZER CWAIT_HEAD_INITIALIZER
+#define __WAIT_QUEUE_HEAD_INIT_ONSTACK CWAIT_HEAD_INIT_ONSTACK
+#define DECLARE_WAIT_QUEUE_HEAD_ONSTACK DECLARE_CWAIT_HEAD_ONSTACK
+#define __WAIT_BIT_KEY_INITIALIZER CWAIT_BIT_KEY_INITIALIZER
+#define __WAIT_ATOMIC_T_KEY_INITIALIZER CWAIT_ATOMIC_T_KEY_INITIALIZER
+
+#define __init_waitqueue_head __init_cwait_head
+#define init_waitqueue_head init_cwait_head
+#define init_waitqueue_entry init_cwait_entry
+#define init_waitqueue_func_entry init_cwait_func_entry
+#endif
+
+static inline int cwait_active(struct cwait_head *q)
{
return !list_empty(&q->task_list);
}

-extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
-extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
-extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
+extern void add_cwait(struct cwait_head *q, struct cwait *wait);
+extern void add_cwait_exclusive(struct cwait_head *q, struct cwait *wait);
+extern void remove_cwait(struct cwait_head *q, struct cwait *wait);
+
+#ifdef CWAIT_COMPAT
+#define waitqueue_active cwait_active
+#define add_wait_queue add_cwait
+#define add_wait_queue_exclusive add_cwait_exclusive
+#define remove_wait_queue remove_cwait
+#endif

-static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
+static inline void __add_cwait(struct cwait_head *head, struct cwait *new)
{
list_add(&new->task_list, &head->task_list);
}
@@ -116,71 +159,125 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
/*
* Used for wake-one threads:
*/
-static inline void
-__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+static inline void __add_cwait_exclusive(struct cwait_head *q,
+ struct cwait *wait)
{
wait->flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue(q, wait);
+ __add_cwait(q, wait);
}

-static inline void __add_wait_queue_tail(wait_queue_head_t *head,
- wait_queue_t *new)
+static inline void __add_cwait_tail(struct cwait_head *head,
+ struct cwait *new)
{
list_add_tail(&new->task_list, &head->task_list);
}

-static inline void
-__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+static inline void __add_cwait_tail_exclusive(struct cwait_head *q,
+ struct cwait *wait)
{
wait->flags |= WQ_FLAG_EXCLUSIVE;
- __add_wait_queue_tail(q, wait);
+ __add_cwait_tail(q, wait);
}

static inline void
-__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
+__remove_cwait(struct cwait_head *head, struct cwait *old)
{
list_del(&old->task_list);
}

-typedef int wait_bit_action_f(struct wait_bit_key *);
-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
-void __wake_up_bit(wait_queue_head_t *, void *, int);
-int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
-void wake_up_bit(void *, int);
-void wake_up_atomic_t(atomic_t *);
-int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
-int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
-wait_queue_head_t *bit_waitqueue(void *, int);
-
-#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
-#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
-#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
-#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)
-#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)
-
-#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
-#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
-#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
-#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
+#ifdef CWAIT_COMPAT
+#define __add_wait_queue __add_cwait
+#define __remove_wait_queue __remove_cwait
+#define __add_wait_queue_tail __add_cwait_tail
+#define __add_wait_queue_exclusive __add_cwait_exclusive
+#define __add_wait_queue_tail_exclusive __add_cwait_tail_exclusive
+#endif
+
+typedef int cwait_bit_action_f(struct wait_bit_key *);
+void __cwake_up(struct cwait_head *q, unsigned int mode, int nr, void *key);
+void __cwake_up_locked_key(struct cwait_head *q, unsigned int mode, void *key);
+void __cwake_up_sync_key(struct cwait_head *q, unsigned int mode, int nr, void *key);
+void __cwake_up_locked(struct cwait_head *q, unsigned int mode, int nr);
+void __cwake_up_sync(struct cwait_head *q, unsigned int mode, int nr);
+void __cwake_up_bit(struct cwait_head *, void *, int);
+int __cwait_on_bit(struct cwait_head *, struct cwait_bit *, cwait_bit_action_f *, unsigned);
+int __cwait_on_bit_lock(struct cwait_head *, struct cwait_bit *, cwait_bit_action_f *, unsigned);
+
+#ifdef CWAIT_COMPAT
+#define wait_bit_action_f cwait_bit_action_f
+#define __wake_up __cwake_up
+#define __wake_up_locked_key __cwake_up_locked_key
+#define __wake_up_sync_key __cwake_up_sync_key
+#define __wake_up_locked __cwake_up_locked
+#define __wake_up_sync __cwake_up_sync
+#define __wake_up_bit __cwake_up_bit
+#define __wait_on_bit __cwait_on_bit
+#define __wait_on_bit_lock __cwait_on_bit_lock
+#endif
+
+void cwake_up_bit(void *, int);
+void cwake_up_atomic_t(atomic_t *);
+int out_of_line_cwait_on_bit(void *, int, cwait_bit_action_f *, unsigned);
+int out_of_line_cwait_on_bit_lock(void *, int, cwait_bit_action_f *, unsigned);
+int out_of_line_cwait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
+struct cwait_head *bit_cwaitqueue(void *, int);
+
+#define cwake_up(x) __cwake_up(x, TASK_NORMAL, 1, NULL)
+#define cwake_up_nr(x, nr) __cwake_up(x, TASK_NORMAL, nr, NULL)
+#define cwake_up_all(x) __cwake_up(x, TASK_NORMAL, 0, NULL)
+#define cwake_up_locked(x) __cwake_up_locked((x), TASK_NORMAL, 1)
+#define cwake_up_all_locked(x) __cwake_up_locked((x), TASK_NORMAL, 0)
+
+#ifdef CWAIT_COMPAT
+#define wake_up cwake_up
+#define wake_up_nr cwake_up_nr
+#define wake_up_all cwake_up_all
+#define wake_up_bit cwake_up_bit
+#define wake_up_atomic_t cwake_up_atomic_t
+#define out_of_line_wait_on_bit out_of_line_cwait_on_bit
+#define out_of_line_wait_on_bit_lock out_of_line_cwait_on_bit_lock
+#define out_of_line_wait_on_atomic_t out_of_line_cwait_on_atomic_t
+#define bit_waitqueue bit_cwaitqueue
+#define wake_up_locked cwake_up_locked
+#define wake_up_all_locked cwake_up_all_locked
+#endif
+
+#define cwake_up_interruptible(x) \
+ __cwake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
+#define cwake_up_interruptible_nr(x, nr) \
+ __cwake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
+#define cwake_up_interruptible_all(x) \
+ __cwake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
+#define cwake_up_interruptible_sync(x) \
+ __cwake_up_sync((x), TASK_INTERRUPTIBLE, 1)
+
+#ifdef CWAIT_COMPAT
+#define wake_up_interruptible cwake_up_interruptible
+#define wake_up_interruptible_nr cwake_up_interruptible_nr
+#define wake_up_interruptible_all cwake_up_interruptible_all
+#define wake_up_interruptible_sync cwake_up_interruptible_sync
+#endif

/*
* Wakeup macros to be used to report events to the targets.
*/
-#define wake_up_poll(x, m) \
- __wake_up(x, TASK_NORMAL, 1, (void *) (m))
-#define wake_up_locked_poll(x, m) \
- __wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
-#define wake_up_interruptible_poll(x, m) \
- __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
-#define wake_up_interruptible_sync_poll(x, m) \
- __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
+#define cwake_up_poll(x, m) \
+ __cwake_up(x, TASK_NORMAL, 1, (void *) (m))
+#define cwake_up_locked_poll(x, m) \
+ __cwake_up_locked_key((x), TASK_NORMAL, (void *) (m))
+#define cwake_up_interruptible_poll(x, m) \
+ __cwake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
+#define cwake_up_interruptible_sync_poll(x, m) \
+ __cwake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
+
+#ifdef CWAIT_COMPAT
+#define wake_up_poll cwake_up_poll
+#define wake_up_locked_poll cwake_up_locked_poll
+#define wake_up_interruptible_poll cwake_up_interruptible_poll
+#define wake_up_interruptible_sync_poll cwake_up_interruptible_sync_poll
+#endif

+/* valid for both simple and complex wait queues */
#define ___wait_cond_timeout(condition) \
({ \
bool __cond = (condition); \
@@ -189,26 +286,27 @@ wait_queue_head_t *bit_waitqueue(void *, int);
__cond || !__ret; \
})

+/* valid for both simple and complex wait queues */
#define ___wait_is_interruptible(state) \
(!__builtin_constant_p(state) || \
state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \

/*
- * The below macro ___wait_event() has an explicit shadow of the __ret
+ * The below macro ___cwait_event() has an explicit shadow of the __ret
* variable when used from the wait_event_*() macros.
*
- * This is so that both can use the ___wait_cond_timeout() construct
+ * This is so that both can use the ___cwait_cond_timeout() construct
* to wrap the condition.
*
- * The type inconsistency of the wait_event_*() __ret variable is also
+ * The type inconsistency of the cwait_event_*() __ret variable is also
* on purpose; we use long where we can return timeout values and int
* otherwise.
*/

-#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \
+#define ___cwait_event(wq, condition, state, exclusive, ret, cmd) \
({ \
__label__ __out; \
- wait_queue_t __wait; \
+ struct cwait __wait; \
long __ret = ret; /* explicit shadow */ \
\
INIT_LIST_HEAD(&__wait.task_list); \
@@ -218,7 +316,7 @@ wait_queue_head_t *bit_waitqueue(void *, int);
__wait.flags = 0; \
\
for (;;) { \
- long __int = prepare_to_wait_event(&wq, &__wait, state);\
+ long __int = prepare_to_cwait_event(&wq, &__wait, state);\
\
if (condition) \
break; \
@@ -226,8 +324,8 @@ wait_queue_head_t *bit_waitqueue(void *, int);
if (___wait_is_interruptible(state) && __int) { \
__ret = __int; \
if (exclusive) { \
- abort_exclusive_wait(&wq, &__wait, \
- state, NULL); \
+ abort_exclusive_cwait(&wq, &__wait, \
+ state, NULL); \
goto __out; \
} \
break; \
@@ -235,41 +333,41 @@ wait_queue_head_t *bit_waitqueue(void *, int);
\
cmd; \
} \
- finish_wait(&wq, &__wait); \
+ finish_cwait(&wq, &__wait); \
__out: __ret; \
})

-#define __wait_event(wq, condition) \
- (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+#define __cwait_event(wq, condition) \
+ (void)___cwait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
schedule())

/**
- * wait_event - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * cwait_event - sleep until a condition gets true
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*/
-#define wait_event(wq, condition) \
+#define cwait_event(wq, condition) \
do { \
if (condition) \
break; \
- __wait_event(wq, condition); \
+ __cwait_event(wq, condition); \
} while (0)

-#define __wait_event_timeout(wq, condition, timeout) \
- ___wait_event(wq, ___wait_cond_timeout(condition), \
+#define __cwait_event_timeout(wq, condition, timeout) \
+ ___cwait_event(wq, ___wait_cond_timeout(condition), \
TASK_UNINTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret))

/**
- * wait_event_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * cwait_event_timeout - sleep until a condition gets true or a timeout elapses
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, in jiffies
*
@@ -277,28 +375,28 @@ do { \
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function returns 0 if the @timeout elapsed, or the remaining
* jiffies (at least 1) if the @condition evaluated to %true before
* the @timeout elapsed.
*/
-#define wait_event_timeout(wq, condition, timeout) \
+#define cwait_event_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
if (!___wait_cond_timeout(condition)) \
- __ret = __wait_event_timeout(wq, condition, timeout); \
+ __ret = __cwait_event_timeout(wq, condition, timeout); \
__ret; \
})

-#define __wait_event_cmd(wq, condition, cmd1, cmd2) \
- (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+#define __cwait_event_cmd(wq, condition, cmd1, cmd2) \
+ (void)___cwait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
cmd1; schedule(); cmd2)

/**
- * wait_event_cmd - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * cwait_event_cmd - sleep until a condition gets true
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @cmd1: the command will be executed before sleep
* @cmd2: the command will be executed after sleep
@@ -310,20 +408,20 @@ do { \
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*/
-#define wait_event_cmd(wq, condition, cmd1, cmd2) \
+#define cwait_event_cmd(wq, condition, cmd1, cmd2) \
do { \
if (condition) \
break; \
- __wait_event_cmd(wq, condition, cmd1, cmd2); \
+ __cwait_event_cmd(wq, condition, cmd1, cmd2); \
} while (0)

-#define __wait_event_interruptible(wq, condition) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
+#define __cwait_event_interruptible(wq, condition) \
+ ___cwait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
schedule())

/**
- * wait_event_interruptible - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * cwait_event_interruptible - sleep until a condition gets true
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
@@ -336,22 +434,23 @@ do { \
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
-#define wait_event_interruptible(wq, condition) \
+#define cwait_event_interruptible(wq, condition) \
({ \
int __ret = 0; \
if (!(condition)) \
- __ret = __wait_event_interruptible(wq, condition); \
+ __ret = __cwait_event_interruptible(wq, condition); \
__ret; \
})

-#define __wait_event_interruptible_timeout(wq, condition, timeout) \
- ___wait_event(wq, ___wait_cond_timeout(condition), \
+#define __cwait_event_interruptible_timeout(wq, condition, timeout) \
+ ___cwait_event(wq, ___wait_cond_timeout(condition), \
TASK_INTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret))

/**
- * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * cwait_event_interruptible_timeout - sleep until a condition gets true or a
+ * timeout elapses
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, in jiffies
*
@@ -359,7 +458,7 @@ do { \
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* Returns:
@@ -367,16 +466,16 @@ do { \
* a signal, or the remaining jiffies (at least 1) if the @condition
* evaluated to %true before the @timeout elapsed.
*/
-#define wait_event_interruptible_timeout(wq, condition, timeout) \
+#define cwait_event_interruptible_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
if (!___wait_cond_timeout(condition)) \
- __ret = __wait_event_interruptible_timeout(wq, \
+ __ret = __cwait_event_interruptible_timeout(wq, \
condition, timeout); \
__ret; \
})

-#define __wait_event_hrtimeout(wq, condition, timeout, state) \
+#define __cwait_event_hrtimeout(wq, condition, timeout, state) \
({ \
int __ret = 0; \
struct hrtimer_sleeper __t; \
@@ -389,7 +488,7 @@ do { \
current->timer_slack_ns, \
HRTIMER_MODE_REL); \
\
- __ret = ___wait_event(wq, condition, state, 0, 0, \
+ __ret = ___cwait_event(wq, condition, state, 0, 0, \
if (!__t.task) { \
__ret = -ETIME; \
break; \
@@ -402,8 +501,9 @@ do { \
})

/**
- * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * cwait_event_hrtimeout - sleep until a condition gets true or a
+ * timeout elapses
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, as a ktime_t
*
@@ -411,24 +511,25 @@ do { \
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function returns 0 if @condition became true, or -ETIME if the timeout
* elapsed.
*/
-#define wait_event_hrtimeout(wq, condition, timeout) \
+#define cwait_event_hrtimeout(wq, condition, timeout) \
({ \
int __ret = 0; \
if (!(condition)) \
- __ret = __wait_event_hrtimeout(wq, condition, timeout, \
+ __ret = __cwait_event_hrtimeout(wq, condition, timeout, \
TASK_UNINTERRUPTIBLE); \
__ret; \
})

/**
- * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq: the waitqueue to wait on
+ * cwait_event_interruptible_hrtimeout - sleep until a condition gets true or
+ * a timeout elapses
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, as a ktime_t
*
@@ -442,37 +543,37 @@ do { \
* The function returns 0 if @condition became true, -ERESTARTSYS if it was
* interrupted by a signal, or -ETIME if the timeout elapsed.
*/
-#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \
+#define cwait_event_interruptible_hrtimeout(wq, condition, timeout) \
({ \
long __ret = 0; \
if (!(condition)) \
- __ret = __wait_event_hrtimeout(wq, condition, timeout, \
+ __ret = __cwait_event_hrtimeout(wq, condition, timeout, \
TASK_INTERRUPTIBLE); \
__ret; \
})

-#define __wait_event_interruptible_exclusive(wq, condition) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
+#define __cwait_event_interruptible_exclusive(wq, condition) \
+ ___cwait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
schedule())

-#define wait_event_interruptible_exclusive(wq, condition) \
+#define cwait_event_interruptible_exclusive(wq, condition) \
({ \
int __ret = 0; \
if (!(condition)) \
- __ret = __wait_event_interruptible_exclusive(wq, condition);\
+ __ret = __cwait_event_interruptible_exclusive(wq, condition);\
__ret; \
})


-#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
+#define __cwait_event_interruptible_locked(wq, condition, exclusive, irq)\
({ \
int __ret = 0; \
- DEFINE_WAIT(__wait); \
+ DEFINE_CWAIT(__wait); \
if (exclusive) \
__wait.flags |= WQ_FLAG_EXCLUSIVE; \
do { \
if (likely(list_empty(&__wait.task_list))) \
- __add_wait_queue_tail(&(wq), &__wait); \
+ __add_cwait_tail(&(wq), &__wait); \
set_current_state(TASK_INTERRUPTIBLE); \
if (signal_pending(current)) { \
__ret = -ERESTARTSYS; \
@@ -488,15 +589,15 @@ do { \
else \
spin_lock(&(wq).lock); \
} while (!(condition)); \
- __remove_wait_queue(&(wq), &__wait); \
+ __remove_cwait(&(wq), &__wait); \
__set_current_state(TASK_RUNNING); \
__ret; \
})


/**
- * wait_event_interruptible_locked - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * cwait_event_interruptible_locked - sleep until a condition gets true
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
@@ -517,13 +618,13 @@ do { \
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
-#define wait_event_interruptible_locked(wq, condition) \
+#define cwait_event_interruptible_locked(wq, condition) \
((condition) \
- ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
+ ? 0 : __cwait_event_interruptible_locked(wq, condition, 0, 0))

/**
- * wait_event_interruptible_locked_irq - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * cwait_event_interruptible_locked_irq - sleep until a condition gets true
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
@@ -544,13 +645,14 @@ do { \
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
-#define wait_event_interruptible_locked_irq(wq, condition) \
+#define cwait_event_interruptible_locked_irq(wq, condition) \
((condition) \
- ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
+ ? 0 : __cwait_event_interruptible_locked(wq, condition, 0, 1))

/**
- * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
- * @wq: the waitqueue to wait on
+ * cwait_event_interruptible_exclusive_locked - sleep exclusively until a
+ * condition gets true
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
@@ -569,19 +671,20 @@ do { \
* set thus when other process waits process on the list if this
* process is awaken further processes are not considered.
*
- * wake_up_locked() has to be called after changing any variable that could
+ * cwake_up_locked() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
-#define wait_event_interruptible_exclusive_locked(wq, condition) \
+#define cwait_event_interruptible_exclusive_locked(wq, condition) \
((condition) \
- ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
+ ? 0 : __cwait_event_interruptible_locked(wq, condition, 1, 0))

/**
- * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * cwait_event_interruptible_exclusive_locked_irq - sleep until a condition
+ * gets true
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
@@ -606,51 +709,51 @@ do { \
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
-#define wait_event_interruptible_exclusive_locked_irq(wq, condition) \
+#define cwait_event_interruptible_exclusive_locked_irq(wq, condition) \
((condition) \
- ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
+ ? 0 : __cwait_event_interruptible_locked(wq, condition, 1, 1))


-#define __wait_event_killable(wq, condition) \
- ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
+#define __cwait_event_killable(wq, condition) \
+ ___cwait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())

/**
- * wait_event_killable - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * cwait_event_killable - sleep until a condition gets true
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_KILLABLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
-#define wait_event_killable(wq, condition) \
+#define cwait_event_killable(wq, condition) \
({ \
int __ret = 0; \
if (!(condition)) \
- __ret = __wait_event_killable(wq, condition); \
+ __ret = __cwait_event_killable(wq, condition); \
__ret; \
})


-#define __wait_event_lock_irq(wq, condition, lock, cmd) \
- (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+#define __cwait_event_lock_irq(wq, condition, lock, cmd) \
+ (void)___cwait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
spin_unlock_irq(&lock); \
cmd; \
schedule(); \
spin_lock_irq(&lock))

/**
- * wait_event_lock_irq_cmd - sleep until a condition gets true. The
- * condition is checked under the lock. This
- * is expected to be called with the lock
- * taken.
- * @wq: the waitqueue to wait on
+ * cwait_event_lock_irq_cmd - sleep until a condition gets true. The
+ * condition is checked under the lock. This
+ * is expected to be called with the lock
+ * taken.
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @lock: a locked spinlock_t, which will be released before cmd
* and schedule() and reacquired afterwards.
@@ -661,26 +764,26 @@ do { \
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* This is supposed to be called while holding the lock. The lock is
* dropped before invoking the cmd and going to sleep and is reacquired
* afterwards.
*/
-#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \
+#define cwait_event_lock_irq_cmd(wq, condition, lock, cmd) \
do { \
if (condition) \
break; \
- __wait_event_lock_irq(wq, condition, lock, cmd); \
+ __cwait_event_lock_irq(wq, condition, lock, cmd); \
} while (0)

/**
- * wait_event_lock_irq - sleep until a condition gets true. The
- * condition is checked under the lock. This
- * is expected to be called with the lock
- * taken.
- * @wq: the waitqueue to wait on
+ * cwait_event_lock_irq - sleep until a condition gets true. The
+ * condition is checked under the lock. This
+ * is expected to be called with the lock
+ * taken.
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @lock: a locked spinlock_t, which will be released before schedule()
* and reacquired afterwards.
@@ -695,26 +798,26 @@ do { \
* This is supposed to be called while holding the lock. The lock is
* dropped before going to sleep and is reacquired afterwards.
*/
-#define wait_event_lock_irq(wq, condition, lock) \
+#define cwait_event_lock_irq(wq, condition, lock) \
do { \
if (condition) \
break; \
- __wait_event_lock_irq(wq, condition, lock, ); \
+ __cwait_event_lock_irq(wq, condition, lock, ); \
} while (0)


-#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \
- ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
+#define __cwait_event_interruptible_lock_irq(wq, condition, lock, cmd) \
+ ___cwait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
spin_unlock_irq(&lock); \
cmd; \
schedule(); \
spin_lock_irq(&lock))

/**
- * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
+ * cwait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
* The condition is checked under the lock. This is expected to
* be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @lock: a locked spinlock_t, which will be released before cmd and
* schedule() and reacquired afterwards.
@@ -725,7 +828,7 @@ do { \
* @condition evaluates to true or a signal is received. The @condition is
* checked each time the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* This is supposed to be called while holding the lock. The lock is
@@ -735,20 +838,20 @@ do { \
* The macro will return -ERESTARTSYS if it was interrupted by a signal
* and 0 if @condition evaluated to true.
*/
-#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \
+#define cwait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)\
({ \
int __ret = 0; \
if (!(condition)) \
- __ret = __wait_event_interruptible_lock_irq(wq, \
+ __ret = __cwait_event_interruptible_lock_irq(wq, \
condition, lock, cmd); \
__ret; \
})

/**
- * wait_event_interruptible_lock_irq - sleep until a condition gets true.
+ * cwait_event_interruptible_lock_irq - sleep until a condition gets true.
* The condition is checked under the lock. This is expected
* to be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @lock: a locked spinlock_t, which will be released before schedule()
* and reacquired afterwards.
@@ -757,7 +860,7 @@ do { \
* @condition evaluates to true or signal is received. The @condition is
* checked each time the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* This is supposed to be called while holding the lock. The lock is
@@ -766,28 +869,28 @@ do { \
* The macro will return -ERESTARTSYS if it was interrupted by a signal
* and 0 if @condition evaluated to true.
*/
-#define wait_event_interruptible_lock_irq(wq, condition, lock) \
+#define cwait_event_interruptible_lock_irq(wq, condition, lock) \
({ \
int __ret = 0; \
if (!(condition)) \
- __ret = __wait_event_interruptible_lock_irq(wq, \
+ __ret = __cwait_event_interruptible_lock_irq(wq, \
condition, lock,); \
__ret; \
})

-#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \
+#define __cwait_event_interruptible_lock_irq_timeout(wq, condition, \
lock, timeout) \
- ___wait_event(wq, ___wait_cond_timeout(condition), \
+ ___cwait_event(wq, ___wait_cond_timeout(condition), \
TASK_INTERRUPTIBLE, 0, timeout, \
spin_unlock_irq(&lock); \
__ret = schedule_timeout(__ret); \
spin_lock_irq(&lock));

/**
- * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
+ * cwait_event_interruptible_lock_irq_timeout - sleep until a condition gets
* true or a timeout elapses. The condition is checked under
* the lock. This is expected to be called with the lock taken.
- * @wq: the waitqueue to wait on
+ * @wq: the complex waitqueue to wait on
* @condition: a C expression for the event to wait for
* @lock: a locked spinlock_t, which will be released before schedule()
* and reacquired afterwards.
@@ -797,7 +900,7 @@ do { \
* @condition evaluates to true or signal is received. The @condition is
* checked each time the waitqueue @wq is woken up.
*
- * wake_up() has to be called after changing any variable that could
+ * cwake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* This is supposed to be called while holding the lock. The lock is
@@ -807,61 +910,110 @@ do { \
* was interrupted by a signal, and the remaining jiffies otherwise
* if the condition evaluated to true before the timeout elapsed.
*/
-#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \
+#define cwait_event_interruptible_lock_irq_timeout(wq, condition, lock, \
timeout) \
({ \
long __ret = timeout; \
if (!___wait_cond_timeout(condition)) \
- __ret = __wait_event_interruptible_lock_irq_timeout( \
+ __ret = __cwait_event_interruptible_lock_irq_timeout( \
wq, condition, lock, timeout); \
__ret; \
})

+#ifdef CWAIT_COMPAT
+#define wait_event cwait_event
+#define __wait_event __cwait_event
+#define ___wait_event ___cwait_event
+#define wait_event_cmd cwait_event_cmd
+#define wait_event_timeout cwait_event_timeout
+#define wait_event_killable cwait_event_killable
+#define wait_event_lock_irq cwait_event_lock_irq
+#define wait_event_lock_irq_cmd cwait_event_lock_irq_cmd
+#define wait_event_interruptible cwait_event_interruptible
+#define __wait_event_interruptible __cwait_event_interruptible
+#define wait_event_interruptible_timeout \
+ cwait_event_interruptible_timeout
+#define wait_event_interruptible_hrtimeout \
+ cwait_event_interruptible_hrtimeout
+#define wait_event_interruptible_exclusive \
+ cwait_event_interruptible_exclusive
+#define wait_event_interruptible_locked \
+ cwait_event_interruptible_locked
+#define wait_event_interruptible_lock_irq \
+ cwait_event_interruptible_lock_irq
+#define wait_event_interruptible_locked_irq \
+ cwait_event_interruptible_locked_irq
+#define wait_event_interruptible_lock_irq_cmd \
+ cwait_event_interruptible_lock_irq_cmd
+#define wait_event_interruptible_lock_irq_timeout \
+ cwait_event_interruptible_lock_irq_timeout
+#define wait_event_interruptible_exclusive_locked \
+ cwait_event_interruptible_exclusive_locked
+#define wait_event_interruptible_exclusive_locked_irq \
+ cwait_event_interruptible_exclusive_locked_irq
+#endif
+
/*
* Waitqueues which are removed from the waitqueue_head at wakeup time
*/
-void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
-
-#define DEFINE_WAIT_FUNC(name, function) \
- wait_queue_t name = { \
+void prepare_to_cwait(struct cwait_head *q, struct cwait *wait, int state);
+void prepare_to_cwait_exclusive(struct cwait_head *q, struct cwait *wait, int state);
+long prepare_to_cwait_event(struct cwait_head *q, struct cwait *wait, int state);
+void finish_cwait(struct cwait_head *q, struct cwait *wait);
+void abort_exclusive_cwait(struct cwait_head *q, struct cwait *wait, unsigned int mode, void *key);
+int autoremove_cwake_function(struct cwait *wait, unsigned mode, int sync, void *key);
+int cwake_bit_function(struct cwait *wait, unsigned mode, int sync, void *key);
+
+#ifdef CWAIT_COMPAT
+#define prepare_to_wait prepare_to_cwait
+#define prepare_to_wait_exclusive prepare_to_cwait_exclusive
+#define prepare_to_wait_event prepare_to_cwait_event
+#define finish_wait finish_cwait
+#define abort_exclusive_wait abort_exclusive_cwait
+#define autoremove_wake_function autoremove_cwake_function
+#define wake_bit_function cwake_bit_function
+#endif
+
+#define DEFINE_CWAIT_FUNC(name, function) \
+ struct cwait name = { \
.private = current, \
.func = function, \
.task_list = LIST_HEAD_INIT((name).task_list), \
}

-#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
+#define DEFINE_CWAIT(name) DEFINE_CWAIT_FUNC(name, autoremove_wake_function)

-#define DEFINE_WAIT_BIT(name, word, bit) \
- struct wait_bit_queue name = { \
- .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \
+#define DEFINE_CWAIT_BIT(name, word, bit) \
+ struct cwait_bit name = { \
+ .key = CWAIT_BIT_KEY_INITIALIZER(word, bit), \
.wait = { \
.private = current, \
- .func = wake_bit_function, \
+ .func = cwake_bit_function, \
.task_list = \
LIST_HEAD_INIT((name).wait.task_list), \
}, \
}

-#define init_wait(wait) \
+#define init_cwait(wait) \
do { \
(wait)->private = current; \
- (wait)->func = autoremove_wake_function; \
+ (wait)->func = autoremove_cwake_function; \
INIT_LIST_HEAD(&(wait)->task_list); \
(wait)->flags = 0; \
} while (0)

+#ifdef CWAIT_COMPAT
+#define DEFINE_WAIT_FUNC DEFINE_CWAIT_FUNC
+#define DEFINE_WAIT DEFINE_CWAIT
+#define DEFINE_WAIT_BIT DEFINE_CWAIT_BIT
+#define init_wait init_cwait
+#endif

extern int bit_wait(struct wait_bit_key *);
extern int bit_wait_io(struct wait_bit_key *);

/**
- * wait_on_bit - wait for a bit to be cleared
+ * cwait_on_bit - wait for a bit to be cleared
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @mode: the task state to sleep in
@@ -869,7 +1021,7 @@ extern int bit_wait_io(struct wait_bit_key *);
* There is a standard hashed waitqueue table for generic use. This
* is the part of the hashtable's accessor API that waits on a bit.
* For instance, if one were to have waiters on a bitflag, one would
- * call wait_on_bit() in threads waiting for the bit to clear.
+ * call cwait_on_bit() in threads waiting for the bit to clear.
* One uses wait_on_bit() where one is waiting for the bit to clear,
* but has no intention of setting it.
* Returned value will be zero if the bit was cleared, or non-zero
@@ -877,23 +1029,23 @@ extern int bit_wait_io(struct wait_bit_key *);
* on that signal.
*/
static inline int
-wait_on_bit(void *word, int bit, unsigned mode)
+cwait_on_bit(void *word, int bit, unsigned mode)
{
if (!test_bit(bit, word))
return 0;
- return out_of_line_wait_on_bit(word, bit,
- bit_wait,
- mode);
+ return out_of_line_cwait_on_bit(word, bit,
+ bit_wait,
+ mode);
}

/**
- * wait_on_bit_io - wait for a bit to be cleared
+ * cwait_on_bit_io - wait for a bit to be cleared
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @mode: the task state to sleep in
*
* Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), but calls
+ * to be cleared. This is similar to cwait_on_bit(), but calls
* io_schedule() instead of schedule() for the actual waiting.
*
* Returned value will be zero if the bit was cleared, or non-zero
@@ -901,17 +1053,17 @@ wait_on_bit(void *word, int bit, unsigned mode)
* on that signal.
*/
static inline int
-wait_on_bit_io(void *word, int bit, unsigned mode)
+cwait_on_bit_io(void *word, int bit, unsigned mode)
{
if (!test_bit(bit, word))
return 0;
- return out_of_line_wait_on_bit(word, bit,
- bit_wait_io,
- mode);
+ return out_of_line_cwait_on_bit(word, bit,
+ bit_wait_io,
+ mode);
}

/**
- * wait_on_bit_action - wait for a bit to be cleared
+ * cwait_on_bit_action - wait for a bit to be cleared
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @action: the function used to sleep, which may take special actions
@@ -919,7 +1071,7 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
*
* Use the standard hashed waitqueue table to wait for a bit
* to be cleared, and allow the waiting action to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
+ * This is like cwait_on_bit() but allows fine control of how the waiting
* is done.
*
* Returned value will be zero if the bit was cleared, or non-zero
@@ -927,15 +1079,15 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
* on that signal.
*/
static inline int
-wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
+cwait_on_bit_action(void *word, int bit, cwait_bit_action_f *action, unsigned mode)
{
if (!test_bit(bit, word))
return 0;
- return out_of_line_wait_on_bit(word, bit, action, mode);
+ return out_of_line_cwait_on_bit(word, bit, action, mode);
}

/**
- * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
+ * cwait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @mode: the task state to sleep in
@@ -945,7 +1097,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode
* when one intends to set it, for instance, trying to lock bitflags.
* For instance, if one were to have waiters trying to set bitflag
* and waiting for it to clear before setting it, one would call
- * wait_on_bit() in threads waiting to be able to set the bit.
+ * cwait_on_bit() in threads waiting to be able to set the bit.
* One uses wait_on_bit_lock() where one is waiting for the bit to
* clear with the intention of setting it, and when done, clearing it.
*
@@ -954,22 +1106,22 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode
* the @mode allows that signal to wake the process.
*/
static inline int
-wait_on_bit_lock(void *word, int bit, unsigned mode)
+cwait_on_bit_lock(void *word, int bit, unsigned mode)
{
if (!test_and_set_bit(bit, word))
return 0;
- return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
+ return out_of_line_cwait_on_bit_lock(word, bit, bit_wait, mode);
}

/**
- * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
+ * cwait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @mode: the task state to sleep in
*
* Use the standard hashed waitqueue table to wait for a bit
* to be cleared and then to atomically set it. This is similar
- * to wait_on_bit(), but calls io_schedule() instead of schedule()
+ * to cwait_on_bit(), but calls io_schedule() instead of schedule()
* for the actual waiting.
*
* Returns zero if the bit was (eventually) found to be clear and was
@@ -977,15 +1129,15 @@ wait_on_bit_lock(void *word, int bit, unsigned mode)
* the @mode allows that signal to wake the process.
*/
static inline int
-wait_on_bit_lock_io(void *word, int bit, unsigned mode)
+cwait_on_bit_lock_io(void *word, int bit, unsigned mode)
{
if (!test_and_set_bit(bit, word))
return 0;
- return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
+ return out_of_line_cwait_on_bit_lock(word, bit, bit_wait_io, mode);
}

/**
- * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
+ * cwait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @action: the function used to sleep, which may take special actions
@@ -994,7 +1146,7 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
* Use the standard hashed waitqueue table to wait for a bit
* to be cleared and then to set it, and allow the waiting action
* to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
+ * This is like cwait_on_bit() but allows fine control of how the waiting
* is done.
*
* Returns zero if the bit was (eventually) found to be clear and was
@@ -1002,15 +1154,15 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
* the @mode allows that signal to wake the process.
*/
static inline int
-wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
+cwait_on_bit_lock_action(void *word, int bit, cwait_bit_action_f *action, unsigned mode)
{
if (!test_and_set_bit(bit, word))
return 0;
- return out_of_line_wait_on_bit_lock(word, bit, action, mode);
+ return out_of_line_cwait_on_bit_lock(word, bit, action, mode);
}

/**
- * wait_on_atomic_t - Wait for an atomic_t to become 0
+ * cwait_on_atomic_t - Wait for an atomic_t to become 0
* @val: The atomic value being waited on, a kernel virtual address
* @action: the function used to sleep, which may take special actions
* @mode: the task state to sleep in
@@ -1020,11 +1172,21 @@ wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned
* outside of the target 'word'.
*/
static inline
-int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
+int cwait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
{
if (atomic_read(val) == 0)
return 0;
- return out_of_line_wait_on_atomic_t(val, action, mode);
+ return out_of_line_cwait_on_atomic_t(val, action, mode);
}

+#ifdef CWAIT_COMPAT
+#define wait_on_bit cwait_on_bit
+#define wait_on_bit_io cwait_on_bit_io
+#define wait_on_bit_lock cwait_on_bit_lock
+#define wait_on_bit_lock_io cwait_on_bit_lock_io
+#define wait_on_bit_action cwait_on_bit_action
+#define wait_on_bit_lock_action cwait_on_bit_lock_action
+#define wait_on_atomic_t cwait_on_atomic_t
+#endif
+
#endif /* _LINUX_WAIT_H */
--
1.9.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Paul Gortmaker
2014-10-18 00:22:59 UTC
Permalink
Completions have no long lasting callbacks and therefore do not need
the complex waitqueue variant. Use simple waitqueues which reduces
the contention on the waitqueue lock.

This was a carry forward from v3.10-rt, with some RT specific chunks,
dropped, and updated to align with names that were chosen to match the
simple waitqueue support.

Originally-by: Thomas Gleixner <***@linutronix.de>
Signed-off-by: Paul Gortmaker <***@windriver.com>

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 5d5aaae3af43..3b2733de7664 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -24,11 +24,11 @@
*/
struct completion {
unsigned int done;
- wait_queue_head_t wait;
+ struct swait_head wait;
};

#define COMPLETION_INITIALIZER(work) \
- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+ { 0, SWAIT_HEAD_INITIALIZER((work).wait) }

#define COMPLETION_INITIALIZER_ONSTACK(work) \
({ init_completion(&work); work; })
@@ -73,7 +73,7 @@ struct completion {
static inline void init_completion(struct completion *x)
{
x->done = 0;
- init_waitqueue_head(&x->wait);
+ init_swait_head(&x->wait);
}

/**
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index a63f4dc27909..7a165c697956 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -30,10 +30,10 @@ void complete(struct completion *x)
{
unsigned long flags;

- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
- __wake_up_locked(&x->wait, TASK_NORMAL, 1);
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ __swake_up_locked(&x->wait, TASK_NORMAL, 1);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);

@@ -50,10 +50,10 @@ void complete_all(struct completion *x)
{
unsigned long flags;

- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done += UINT_MAX/2;
- __wake_up_locked(&x->wait, TASK_NORMAL, 0);
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ __swake_up_locked(&x->wait, TASK_NORMAL, 0);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);

@@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
- DECLARE_WAITQUEUE(wait, current);
+ DECLARE_SWAIT(wait);

- __add_wait_queue_tail_exclusive(&x->wait, &wait);
+ __prepare_to_swait(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
- spin_unlock_irq(&x->wait.lock);
+ raw_spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
- spin_lock_irq(&x->wait.lock);
+ raw_spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
- __remove_wait_queue(&x->wait, &wait);
+ __finish_swait(&x->wait, &wait);
if (!x->done)
return timeout;
}
@@ -89,9 +89,9 @@ __wait_for_common(struct completion *x,
{
might_sleep();

- spin_lock_irq(&x->wait.lock);
+ raw_spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
- spin_unlock_irq(&x->wait.lock);
+ raw_spin_unlock_irq(&x->wait.lock);
return timeout;
}

@@ -267,12 +267,12 @@ bool try_wait_for_completion(struct completion *x)
unsigned long flags;
int ret = 1;

- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
else
x->done--;
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
@@ -290,10 +290,10 @@ bool completion_done(struct completion *x)
unsigned long flags;
int ret = 1;

- spin_lock_irqsave(&x->wait.lock, flags);
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = 0;
- spin_unlock_irqrestore(&x->wait.lock, flags);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(completion_done);
--
1.9.2
Paul Gortmaker
2014-10-18 00:22:57 UTC
Permalink
Per the previous commit, the goal is to explicitly distinguish
between complex wait and simple wait in the names of functions
and structs. We avoid re-using the old namespace like
"add_wait_foo(), to ensure it is clear which users have actively
chosen which variant they want to use, vs. which ones have just
been grandfathered into using the pre-existing complex variants.

In order to achieve this, we have already done the following:

a) rename existing structs and functions with an additonal "c"
to indicate they are the complex variants [limited to wait.h]

b) introduce temporary wait_xyz() ----> cwait_xyz() mappings that will
let us do tree-wide conversions at our leisure (with coccinelle).
The mappings can be disabled with #undef CWAIT_COMPAT for testing.

Here we update the existing core implementation of complex wait
functions in kernel/sched/wait.c to have "c" prefix and hence not rely
on (b) above. Implicitly we avoid using typedefs at the same time
as we make these name changes.

We also drop "queue" from the names in order to make some of them
slightly less cumbersome, as per the previous commit.

This prepares us for adding swait_xyz() variations into wait.c
along side the existing cwait_xyz() functions renamed here.

Signed-off-by: Paul Gortmaker <***@windriver.com>

diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 15cab1a4f84e..e62bd9c8aaf7 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -10,46 +10,46 @@
#include <linux/wait.h>
#include <linux/hash.h>

-void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
+void __init_cwait_head(struct cwait_head *q, const char *name,
+ struct lock_class_key *key)
{
spin_lock_init(&q->lock);
lockdep_set_class_and_name(&q->lock, key, name);
INIT_LIST_HEAD(&q->task_list);
}
+EXPORT_SYMBOL(__init_cwait_head);

-EXPORT_SYMBOL(__init_waitqueue_head);
-
-void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void add_cwait(struct cwait_head *q, struct cwait *wait)
{
unsigned long flags;

wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue(q, wait);
+ __add_cwait(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(add_wait_queue);
+EXPORT_SYMBOL(add_cwait);

-void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
+void add_cwait_exclusive(struct cwait_head *q, struct cwait *wait)
{
unsigned long flags;

wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue_tail(q, wait);
+ __add_cwait_tail(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(add_wait_queue_exclusive);
+EXPORT_SYMBOL(add_cwait_exclusive);

-void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+void remove_cwait(struct cwait_head *q, struct cwait *wait)
{
unsigned long flags;

spin_lock_irqsave(&q->lock, flags);
- __remove_wait_queue(q, wait);
+ __remove_cwait(q, wait);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(remove_wait_queue);
+EXPORT_SYMBOL(remove_cwait);


/*
@@ -61,10 +61,10 @@ EXPORT_SYMBOL(remove_wait_queue);
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
-static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, int wake_flags, void *key)
+static void __cwake_up_common(struct cwait_head *q, unsigned int mode,
+ int nr_exclusive, int wake_flags, void *key)
{
- wait_queue_t *curr, *next;
+ struct cwait *curr, *next;

list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
@@ -76,8 +76,8 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
}

/**
- * __wake_up - wake up threads blocked on a waitqueue.
- * @q: the waitqueue
+ * __cwake_up - wake up threads blocked on a waitqueue.
+ * @q: the complex waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
@@ -85,34 +85,34 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
-void __wake_up(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, void *key)
+void __cwake_up(struct cwait_head *q, unsigned int mode, int nr_exclusive,
+ void *key)
{
unsigned long flags;

spin_lock_irqsave(&q->lock, flags);
- __wake_up_common(q, mode, nr_exclusive, 0, key);
+ __cwake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(__wake_up);
+EXPORT_SYMBOL(__cwake_up);

/*
- * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
+ * Same as __cwake_up but called with the spinlock in struct cwait_head held.
*/
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
+void __cwake_up_locked(struct cwait_head *q, unsigned int mode, int nr)
{
- __wake_up_common(q, mode, nr, 0, NULL);
+ __cwake_up_common(q, mode, nr, 0, NULL);
}
-EXPORT_SYMBOL_GPL(__wake_up_locked);
+EXPORT_SYMBOL_GPL(__cwake_up_locked);

-void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+void __cwake_up_locked_key(struct cwait_head *q, unsigned int mode, void *key)
{
- __wake_up_common(q, mode, 1, 0, key);
+ __cwake_up_common(q, mode, 1, 0, key);
}
-EXPORT_SYMBOL_GPL(__wake_up_locked_key);
+EXPORT_SYMBOL_GPL(__cwake_up_locked_key);

/**
- * __wake_up_sync_key - wake up threads blocked on a waitqueue.
+ * __cwake_up_sync_key - wake up threads blocked on a waitqueue.
* @q: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
@@ -128,8 +128,8 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key);
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
-void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, void *key)
+void __cwake_up_sync_key(struct cwait_head *q, unsigned int mode,
+ int nr_exclusive, void *key)
{
unsigned long flags;
int wake_flags = 1; /* XXX WF_SYNC */
@@ -141,19 +141,19 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
wake_flags = 0;

spin_lock_irqsave(&q->lock, flags);
- __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
+ __cwake_up_common(q, mode, nr_exclusive, wake_flags, key);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL_GPL(__wake_up_sync_key);
+EXPORT_SYMBOL_GPL(__cwake_up_sync_key);

/*
- * __wake_up_sync - see __wake_up_sync_key()
+ * __cwake_up_sync - see __cwake_up_sync_key()
*/
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void __cwake_up_sync(struct cwait_head *q, unsigned int mode, int nr_exclusive)
{
- __wake_up_sync_key(q, mode, nr_exclusive, NULL);
+ __cwake_up_sync_key(q, mode, nr_exclusive, NULL);
}
-EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
+EXPORT_SYMBOL_GPL(__cwake_up_sync); /* For internal use only */

/*
* Note: we use "set_current_state()" _after_ the wait-queue add,
@@ -167,35 +167,34 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
* stops them from bleeding out - it would still allow subsequent
* loads to move into the critical region).
*/
-void
-prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
+void prepare_to_cwait(struct cwait_head *q, struct cwait *wait, int state)
{
unsigned long flags;

wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
- __add_wait_queue(q, wait);
+ __add_cwait(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(prepare_to_wait);
+EXPORT_SYMBOL(prepare_to_cwait);

-void
-prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
+void prepare_to_cwait_exclusive(struct cwait_head *q, struct cwait *wait,
+ int state)
{
unsigned long flags;

wait->flags |= WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
- __add_wait_queue_tail(q, wait);
+ __add_cwait_tail(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(prepare_to_wait_exclusive);
+EXPORT_SYMBOL(prepare_to_cwait_exclusive);

-long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
+long prepare_to_cwait_event(struct cwait_head *q, struct cwait *wait, int state)
{
unsigned long flags;

@@ -203,32 +202,32 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
return -ERESTARTSYS;

wait->private = current;
- wait->func = autoremove_wake_function;
+ wait->func = autoremove_cwake_function;

spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list)) {
if (wait->flags & WQ_FLAG_EXCLUSIVE)
- __add_wait_queue_tail(q, wait);
+ __add_cwait_tail(q, wait);
else
- __add_wait_queue(q, wait);
+ __add_cwait(q, wait);
}
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);

return 0;
}
-EXPORT_SYMBOL(prepare_to_wait_event);
+EXPORT_SYMBOL(prepare_to_cwait_event);

/**
- * finish_wait - clean up after waiting in a queue
- * @q: waitqueue waited on
+ * finish_cwait - clean up after waiting in a queue
+ * @q: complex waitqueue waited on
* @wait: wait descriptor
*
* Sets current thread back to running state and removes
* the wait descriptor from the given waitqueue if still
* queued.
*/
-void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
+void finish_cwait(struct cwait_head *q, struct cwait *wait)
{
unsigned long flags;

@@ -252,10 +251,10 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
spin_unlock_irqrestore(&q->lock, flags);
}
}
-EXPORT_SYMBOL(finish_wait);
+EXPORT_SYMBOL(finish_cwait);

/**
- * abort_exclusive_wait - abort exclusive waiting in a queue
+ * abort_exclusive_cwait - abort exclusive waiting in a queue
* @q: waitqueue waited on
* @wait: wait descriptor
* @mode: runstate of the waiter to be woken
@@ -272,8 +271,8 @@ EXPORT_SYMBOL(finish_wait);
* aborts and is woken up concurrently and no one wakes up
* the next waiter.
*/
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
- unsigned int mode, void *key)
+void abort_exclusive_cwait(struct cwait_head *q, struct cwait *wait,
+ unsigned int mode, void *key)
{
unsigned long flags;

@@ -281,36 +280,36 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
spin_lock_irqsave(&q->lock, flags);
if (!list_empty(&wait->task_list))
list_del_init(&wait->task_list);
- else if (waitqueue_active(q))
- __wake_up_locked_key(q, mode, key);
+ else if (cwait_active(q))
+ __cwake_up_locked_key(q, mode, key);
spin_unlock_irqrestore(&q->lock, flags);
}
-EXPORT_SYMBOL(abort_exclusive_wait);
+EXPORT_SYMBOL(abort_exclusive_cwait);

-int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+int autoremove_cwake_function(struct cwait *wait, unsigned mode, int sync,
+ void *key)
{
- int ret = default_wake_function(wait, mode, sync, key);
+ int ret = default_cwake_function(wait, mode, sync, key);

if (ret)
list_del_init(&wait->task_list);
return ret;
}
-EXPORT_SYMBOL(autoremove_wake_function);
+EXPORT_SYMBOL(autoremove_cwake_function);

-int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+int cwake_bit_function(struct cwait *wait, unsigned mode, int sync, void *arg)
{
- struct wait_bit_key *key = arg;
- struct wait_bit_queue *wait_bit
- = container_of(wait, struct wait_bit_queue, wait);
+ struct cwait_bit_key *key = arg;
+ struct cwait_bit *wait_bit = container_of(wait, struct cwait_bit, wait);

if (wait_bit->key.flags != key->flags ||
wait_bit->key.bit_nr != key->bit_nr ||
test_bit(key->bit_nr, key->flags))
return 0;
else
- return autoremove_wake_function(wait, mode, sync, key);
+ return autoremove_cwake_function(wait, mode, sync, key);
}
-EXPORT_SYMBOL(wake_bit_function);
+EXPORT_SYMBOL(cwake_bit_function);

/*
* To allow interruptible waiting and asynchronous (i.e. nonblocking)
@@ -318,79 +317,79 @@ EXPORT_SYMBOL(wake_bit_function);
* permitted return codes. Nonzero return codes halt waiting and return.
*/
int __sched
-__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
+__cwait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
wait_bit_action_f *action, unsigned mode)
{
int ret = 0;

do {
- prepare_to_wait(wq, &q->wait, mode);
+ prepare_to_cwait(wq, &q->wait, mode);
if (test_bit(q->key.bit_nr, q->key.flags))
ret = (*action)(&q->key);
} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
- finish_wait(wq, &q->wait);
+ finish_cwait(wq, &q->wait);
return ret;
}
-EXPORT_SYMBOL(__wait_on_bit);
+EXPORT_SYMBOL(__cwait_on_bit);

-int __sched out_of_line_wait_on_bit(void *word, int bit,
+int __sched out_of_line_cwait_on_bit(void *word, int bit,
wait_bit_action_f *action, unsigned mode)
{
- wait_queue_head_t *wq = bit_waitqueue(word, bit);
- DEFINE_WAIT_BIT(wait, word, bit);
+ struct cwait_head *wq = bit_cwaitqueue(word, bit);
+ DEFINE_CWAIT_BIT(wait, word, bit);

- return __wait_on_bit(wq, &wait, action, mode);
+ return __cwait_on_bit(wq, &wait, action, mode);
}
-EXPORT_SYMBOL(out_of_line_wait_on_bit);
+EXPORT_SYMBOL(out_of_line_cwait_on_bit);

int __sched
-__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
+__cwait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
wait_bit_action_f *action, unsigned mode)
{
do {
int ret;

- prepare_to_wait_exclusive(wq, &q->wait, mode);
+ prepare_to_cwait_exclusive(wq, &q->wait, mode);
if (!test_bit(q->key.bit_nr, q->key.flags))
continue;
ret = action(&q->key);
if (!ret)
continue;
- abort_exclusive_wait(wq, &q->wait, mode, &q->key);
+ abort_exclusive_cwait(wq, &q->wait, mode, &q->key);
return ret;
} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
- finish_wait(wq, &q->wait);
+ finish_cwait(wq, &q->wait);
return 0;
}
-EXPORT_SYMBOL(__wait_on_bit_lock);
+EXPORT_SYMBOL(__cwait_on_bit_lock);

-int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
+int __sched out_of_line_cwait_on_bit_lock(void *word, int bit,
wait_bit_action_f *action, unsigned mode)
{
- wait_queue_head_t *wq = bit_waitqueue(word, bit);
- DEFINE_WAIT_BIT(wait, word, bit);
+ struct cwait_head *wq = bit_cwaitqueue(word, bit);
+ DEFINE_CWAIT_BIT(wait, word, bit);

- return __wait_on_bit_lock(wq, &wait, action, mode);
+ return __cwait_on_bit_lock(wq, &wait, action, mode);
}
-EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
+EXPORT_SYMBOL(out_of_line_cwait_on_bit_lock);

-void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
+void __cwake_up_bit(struct cwait_head *wq, void *word, int bit)
{
- struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
- if (waitqueue_active(wq))
- __wake_up(wq, TASK_NORMAL, 1, &key);
+ struct cwait_bit_key key = CWAIT_BIT_KEY_INITIALIZER(word, bit);
+ if (cwait_active(wq))
+ __cwake_up(wq, TASK_NORMAL, 1, &key);
}
-EXPORT_SYMBOL(__wake_up_bit);
+EXPORT_SYMBOL(__cwake_up_bit);

/**
- * wake_up_bit - wake up a waiter on a bit
+ * cwake_up_bit - wake up a waiter on a bit
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
*
* There is a standard hashed waitqueue table for generic use. This
* is the part of the hashtable's accessor API that wakes up waiters
* on a bit. For instance, if one were to have waiters on a bitflag,
- * one would call wake_up_bit() after clearing the bit.
+ * one would call cwake_up_bit() after clearing the bit.
*
* In order for this to function properly, as it uses waitqueue_active()
* internally, some kind of memory barrier must be done prior to calling
@@ -399,13 +398,13 @@ EXPORT_SYMBOL(__wake_up_bit);
* may need to use a less regular barrier, such fs/inode.c's smp_mb(),
* because spin_unlock() does not guarantee a memory barrier.
*/
-void wake_up_bit(void *word, int bit)
+void cwake_up_bit(void *word, int bit)
{
- __wake_up_bit(bit_waitqueue(word, bit), word, bit);
+ __cwake_up_bit(bit_cwaitqueue(word, bit), word, bit);
}
-EXPORT_SYMBOL(wake_up_bit);
+EXPORT_SYMBOL(cwake_up_bit);

-wait_queue_head_t *bit_waitqueue(void *word, int bit)
+struct cwait_head *bit_cwaitqueue(void *word, int bit)
{
const int shift = BITS_PER_LONG == 32 ? 5 : 6;
const struct zone *zone = page_zone(virt_to_page(word));
@@ -413,83 +412,84 @@ wait_queue_head_t *bit_waitqueue(void *word, int bit)

return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
}
-EXPORT_SYMBOL(bit_waitqueue);
+EXPORT_SYMBOL(bit_cwaitqueue);

/*
* Manipulate the atomic_t address to produce a better bit waitqueue table hash
* index (we're keying off bit -1, but that would produce a horrible hash
* value).
*/
-static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
+static inline struct cwait_head *atomic_t_cwaitqueue(atomic_t *p)
{
if (BITS_PER_LONG == 64) {
unsigned long q = (unsigned long)p;
- return bit_waitqueue((void *)(q & ~1), q & 1);
+ return bit_cwaitqueue((void *)(q & ~1), q & 1);
}
- return bit_waitqueue(p, 0);
+ return bit_cwaitqueue(p, 0);
}

-static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync,
- void *arg)
+static int cwake_atomic_t_function(struct cwait *wait, unsigned mode, int sync,
+ void *arg)
{
- struct wait_bit_key *key = arg;
- struct wait_bit_queue *wait_bit
- = container_of(wait, struct wait_bit_queue, wait);
+ struct cwait_bit_key *key = arg;
+ struct cwait_bit *wait_bit = container_of(wait, struct cwait_bit, wait);
atomic_t *val = key->flags;

if (wait_bit->key.flags != key->flags ||
wait_bit->key.bit_nr != key->bit_nr ||
atomic_read(val) != 0)
return 0;
- return autoremove_wake_function(wait, mode, sync, key);
+ return autoremove_cwake_function(wait, mode, sync, key);
}

/*
* To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting,
- * the actions of __wait_on_atomic_t() are permitted return codes. Nonzero
+ * the actions of __cwait_on_atomic_t() are permitted return codes. Nonzero
* return codes halt waiting and return.
*/
-static __sched
-int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
- int (*action)(atomic_t *), unsigned mode)
+static __sched int __cwait_on_atomic_t(struct cwait_head *wq,
+ struct cwait_bit *q,
+ int (*action)(atomic_t *),
+ unsigned mode)
{
atomic_t *val;
int ret = 0;

do {
- prepare_to_wait(wq, &q->wait, mode);
+ prepare_to_cwait(wq, &q->wait, mode);
val = q->key.flags;
if (atomic_read(val) == 0)
break;
ret = (*action)(val);
} while (!ret && atomic_read(val) != 0);
- finish_wait(wq, &q->wait);
+ finish_cwait(wq, &q->wait);
return ret;
}

-#define DEFINE_WAIT_ATOMIC_T(name, p) \
- struct wait_bit_queue name = { \
- .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \
+#define DEFINE_CWAIT_ATOMIC_T(name, p) \
+ struct cwait_bit name = { \
+ .key = CWAIT_ATOMIC_T_KEY_INITIALIZER(p), \
.wait = { \
.private = current, \
- .func = wake_atomic_t_function, \
+ .func = cwake_atomic_t_function, \
.task_list = \
LIST_HEAD_INIT((name).wait.task_list), \
}, \
}

-__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
- unsigned mode)
+__sched int out_of_line_cwait_on_atomic_t(atomic_t *p,
+ int (*action)(atomic_t *),
+ unsigned mode)
{
- wait_queue_head_t *wq = atomic_t_waitqueue(p);
- DEFINE_WAIT_ATOMIC_T(wait, p);
+ struct cwait_head *wq = atomic_t_cwaitqueue(p);
+ DEFINE_CWAIT_ATOMIC_T(wait, p);

- return __wait_on_atomic_t(wq, &wait, action, mode);
+ return __cwait_on_atomic_t(wq, &wait, action, mode);
}
-EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
+EXPORT_SYMBOL(out_of_line_cwait_on_atomic_t);

/**
- * wake_up_atomic_t - Wake up a waiter on a atomic_t
+ * cwake_up_atomic_t - Wake up a waiter on a atomic_t
* @p: The atomic_t being waited on, a kernel virtual address
*
* Wake up anyone waiting for the atomic_t to go to zero.
@@ -497,11 +497,11 @@ EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
* Abuse the bit-waker function and its waitqueue hash table set (the atomic_t
* check is done by the waiter's wake function, not the by the waker itself).
*/
-void wake_up_atomic_t(atomic_t *p)
+void cwake_up_atomic_t(atomic_t *p)
{
- __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
+ __cwake_up_bit(atomic_t_cwaitqueue(p), p, CWAIT_ATOMIC_T_BIT_NR);
}
-EXPORT_SYMBOL(wake_up_atomic_t);
+EXPORT_SYMBOL(cwake_up_atomic_t);

__sched int bit_wait(struct wait_bit_key *word)
{
--
1.9.2
Paul Gortmaker
2014-10-18 00:22:58 UTC
Permalink
The existing wait queue support has support for custom wake up call
backs, wake flags, wake key (passed to call back) and exclusive
flags that allow wakers to be tagged as exclusive, for limiting
the number of wakers.

In a lot of cases, none of these features are used, and hence we
can benefit from a slimmed down version that lowers memory overhead
and reduces runtime overhead.

The concept originated from RT, where waitqueues are a constant
source of trouble, as we can't convert the head lock to a raw
spinlock due to fancy and long lasting callbacks. Unlike the RT
version, which had support in stand-alone files, here we integrate
it into existing wait.[ch] files and make it as parallel as possible
to the already in tree complex wait queue support.

With the removal of custom callbacks, we can use a raw lock for
queue list manipulations, hence allowing the simple wait support
to be used in RT.

The other big difference between the -rt version is that here we
add the code alongside the existing complex waitqueue support, for
ease of maintenance, and to highlight any differences between the
two implementations.

Originally-by: Thomas Gleixner <***@linutronix.de>
Signed-off-by: Paul Gortmaker <***@windriver.com>

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 526e398cc249..2a57e00250f9 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -31,6 +31,11 @@ struct cwait {
struct list_head task_list;
};

+struct swait {
+ struct task_struct *task;
+ struct list_head node;
+};
+
struct cwait_bit_key {
void *flags;
int bit_nr;
@@ -49,6 +54,11 @@ struct cwait_head {
};
typedef struct cwait_head cwait_head_t;

+struct swait_head {
+ raw_spinlock_t lock;
+ struct list_head task_list;
+};
+
#ifdef CWAIT_COMPAT
#define wait_queue_t cwait_t
#define wait_queue_head_t cwait_head_t
@@ -70,16 +80,30 @@ struct task_struct;
.func = default_cwake_function, \
.task_list = { NULL, NULL } }

+#define SWAIT_INITIALIZER(name, tsk) { \
+ .task = current, \
+ .node = LIST_HEAD_INIT((name).node) }
+
#define DECLARE_CWAIT(name, tsk) \
struct cwait name = CWAIT_INITIALIZER(name, tsk)

+#define DECLARE_SWAIT(name) \
+ struct swait name = SWAIT_INITIALIZER(name, tsk)
+
#define CWAIT_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.task_list = { &(name).task_list, &(name).task_list } }

+#define SWAIT_HEAD_INITIALIZER(name) { \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+ .task_list = { &(name).task_list, &(name).task_list } }
+
#define DECLARE_CWAIT_HEAD(name) \
struct cwait_head name = CWAIT_HEAD_INITIALIZER(name)

+#define DECLARE_SWAIT_HEAD(name) \
+ struct swait_head name = SWAIT_HEAD_INITIALIZER(name)
+
#define CWAIT_BIT_KEY_INITIALIZER(word, bit) \
{ .flags = word, .bit_nr = bit, }

@@ -89,6 +113,9 @@ struct task_struct;
extern void __init_cwait_head(struct cwait_head *q, const char *name,
struct lock_class_key *);

+extern void __init_swait_head(struct swait_head *q, const char *name,
+ struct lock_class_key *);
+
#define init_cwait_head(q) \
do { \
static struct lock_class_key __key; \
@@ -96,13 +123,25 @@ extern void __init_cwait_head(struct cwait_head *q, const char *name,
__init_cwait_head((q), #q, &__key); \
} while (0)

+#define init_swait_head(q) \
+ do { \
+ static struct lock_class_key __key; \
+ \
+ __init_swait_head((q), #q, &__key); \
+ } while (0)
+
#ifdef CONFIG_LOCKDEP
# define CWAIT_HEAD_INIT_ONSTACK(name) \
({ init_cwait_head(&name); name; })
+# define SWAIT_HEAD_INIT_ONSTACK(name) \
+ ({ init_swait_head(&name); name; })
# define DECLARE_CWAIT_HEAD_ONSTACK(name) \
struct cwait_head name = CWAIT_HEAD_INIT_ONSTACK(name)
+# define DECLARE_SWAIT_HEAD_ONSTACK(name) \
+ struct swait_head name = SWAIT_HEAD_INIT_ONSTACK(name)
#else
# define DECLARE_CWAIT_HEAD_ONSTACK(name) DECLARE_CWAIT_HEAD(name)
+# define DECLARE_SWAIT_HEAD_ONSTACK(name) DECLARE_SWAIT_HEAD(name)
#endif

static inline void init_cwait_entry(struct cwait *q, struct task_struct *p)
@@ -140,9 +179,16 @@ static inline int cwait_active(struct cwait_head *q)
return !list_empty(&q->task_list);
}

+static inline int swait_active(struct swait_head *q)
+{
+ return !list_empty(&q->task_list);
+}
+
extern void add_cwait(struct cwait_head *q, struct cwait *wait);
+extern void add_swait(struct swait_head *q, struct swait *wait);
extern void add_cwait_exclusive(struct cwait_head *q, struct cwait *wait);
extern void remove_cwait(struct cwait_head *q, struct cwait *wait);
+extern void remove_swait(struct swait_head *q, struct swait *wait);

#ifdef CWAIT_COMPAT
#define waitqueue_active cwait_active
@@ -156,6 +202,11 @@ static inline void __add_cwait(struct cwait_head *head, struct cwait *new)
list_add(&new->task_list, &head->task_list);
}

+static inline void __add_swait(struct swait_head *head, struct swait *new)
+{
+ list_add(&new->node, &head->task_list);
+}
+
/*
* Used for wake-one threads:
*/
@@ -172,6 +223,12 @@ static inline void __add_cwait_tail(struct cwait_head *head,
list_add_tail(&new->task_list, &head->task_list);
}

+static inline void __add_swait_tail(struct swait_head *head,
+ struct swait *new)
+{
+ list_add_tail(&new->node, &head->task_list);
+}
+
static inline void __add_cwait_tail_exclusive(struct cwait_head *q,
struct cwait *wait)
{
@@ -185,6 +242,12 @@ __remove_cwait(struct cwait_head *head, struct cwait *old)
list_del(&old->task_list);
}

+static inline void
+__remove_swait(struct swait_head *head, struct swait *old)
+{
+ list_del_init(&old->node);
+}
+
#ifdef CWAIT_COMPAT
#define __add_wait_queue __add_cwait
#define __remove_wait_queue __remove_cwait
@@ -195,9 +258,11 @@ __remove_cwait(struct cwait_head *head, struct cwait *old)

typedef int cwait_bit_action_f(struct wait_bit_key *);
void __cwake_up(struct cwait_head *q, unsigned int mode, int nr, void *key);
+void __swake_up(struct swait_head *q, unsigned int mode, int nr);
void __cwake_up_locked_key(struct cwait_head *q, unsigned int mode, void *key);
void __cwake_up_sync_key(struct cwait_head *q, unsigned int mode, int nr, void *key);
void __cwake_up_locked(struct cwait_head *q, unsigned int mode, int nr);
+void __swake_up_locked(struct swait_head *q, unsigned int mode, int nr);
void __cwake_up_sync(struct cwait_head *q, unsigned int mode, int nr);
void __cwake_up_bit(struct cwait_head *, void *, int);
int __cwait_on_bit(struct cwait_head *, struct cwait_bit *, cwait_bit_action_f *, unsigned);
@@ -223,10 +288,15 @@ int out_of_line_cwait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
struct cwait_head *bit_cwaitqueue(void *, int);

#define cwake_up(x) __cwake_up(x, TASK_NORMAL, 1, NULL)
+#define swake_up(x) __swake_up(x, TASK_NORMAL, 1)
#define cwake_up_nr(x, nr) __cwake_up(x, TASK_NORMAL, nr, NULL)
+#define swake_up_nr(x, nr) __swake_up(x, TASK_NORMAL, nr)
#define cwake_up_all(x) __cwake_up(x, TASK_NORMAL, 0, NULL)
+#define swake_up_all(x) __swake_up(x, TASK_NORMAL, 0)
#define cwake_up_locked(x) __cwake_up_locked((x), TASK_NORMAL, 1)
+#define swake_up_locked(x) __swake_up_locked((x), TASK_NORMAL, 1)
#define cwake_up_all_locked(x) __cwake_up_locked((x), TASK_NORMAL, 0)
+#define swake_up_all_locked(x) __swake_up_locked((x), TASK_NORMAL, 0)

#ifdef CWAIT_COMPAT
#define wake_up cwake_up
@@ -337,10 +407,39 @@ struct cwait_head *bit_cwaitqueue(void *, int);
__out: __ret; \
})

+/* as above but for swait, and hence with implied "exclusive == 0" */
+#define ___swait_event(wq, condition, state, ret, cmd) \
+({ \
+ struct swait __wait; \
+ long __ret = ret; \
+ \
+ INIT_LIST_HEAD(&__wait.node); \
+ for (;;) { \
+ long __int = prepare_to_swait_event(&wq, &__wait, state);\
+ \
+ if (condition) \
+ break; \
+ \
+ if (___wait_is_interruptible(state) && __int) { \
+ __ret = __int; \
+ break; \
+ } \
+ \
+ cmd; \
+ } \
+ finish_swait(&wq, &__wait); \
+ __ret; \
+})
+
+
#define __cwait_event(wq, condition) \
(void)___cwait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
schedule())

+#define __swait_event(wq, condition) \
+ (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \
+ schedule())
+
/**
* cwait_event - sleep until a condition gets true
* @wq: the complex waitqueue to wait on
@@ -360,11 +459,23 @@ do { \
__cwait_event(wq, condition); \
} while (0)

+#define swait_event(wq, condition) \
+do { \
+ if (condition) \
+ break; \
+ __swait_event(wq, condition); \
+} while (0)
+
#define __cwait_event_timeout(wq, condition, timeout) \
___cwait_event(wq, ___wait_cond_timeout(condition), \
TASK_UNINTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret))

+#define __swait_event_timeout(wq, condition, timeout) \
+ ___swait_event(wq, ___wait_cond_timeout(condition), \
+ TASK_UNINTERRUPTIBLE, timeout, \
+ __ret = schedule_timeout(__ret))
+
/**
* cwait_event_timeout - sleep until a condition gets true or a timeout elapses
* @wq: the complex waitqueue to wait on
@@ -390,10 +501,22 @@ do { \
__ret; \
})

+#define swait_event_timeout(wq, condition, timeout) \
+({ \
+ long __ret = timeout; \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __swait_event_timeout(wq, condition, timeout); \
+ __ret; \
+})
+
#define __cwait_event_cmd(wq, condition, cmd1, cmd2) \
(void)___cwait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
cmd1; schedule(); cmd2)

+#define __swait_event_cmd(wq, condition, cmd1, cmd2) \
+ (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \
+ cmd1; schedule(); cmd2)
+
/**
* cwait_event_cmd - sleep until a condition gets true
* @wq: the complex waitqueue to wait on
@@ -415,10 +538,21 @@ do { \
__cwait_event_cmd(wq, condition, cmd1, cmd2); \
} while (0)

+#define swait_event_cmd(wq, condition, cmd1, cmd2) \
+do { \
+ if (condition) \
+ break; \
+ __swait_event_cmd(wq, condition, cmd1, cmd2); \
+} while (0)
+
#define __cwait_event_interruptible(wq, condition) \
___cwait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
schedule())

+#define __swait_event_interruptible(wq, condition) \
+ ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \
+ schedule())
+
/**
* cwait_event_interruptible - sleep until a condition gets true
* @wq: the complex waitqueue to wait on
@@ -442,11 +576,24 @@ do { \
__ret; \
})

+#define swait_event_interruptible(wq, condition) \
+({ \
+ int __ret = 0; \
+ if (!(condition)) \
+ __ret = __swait_event_interruptible(wq, condition); \
+ __ret; \
+})
+
#define __cwait_event_interruptible_timeout(wq, condition, timeout) \
___cwait_event(wq, ___wait_cond_timeout(condition), \
TASK_INTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret))

+#define __swait_event_interruptible_timeout(wq, condition, timeout) \
+ ___swait_event(wq, ___wait_cond_timeout(condition), \
+ TASK_INTERRUPTIBLE, timeout, \
+ __ret = schedule_timeout(__ret))
+
/**
* cwait_event_interruptible_timeout - sleep until a condition gets true or a
* timeout elapses
@@ -475,6 +622,15 @@ do { \
__ret; \
})

+#define swait_event_interruptible_timeout(wq, condition, timeout) \
+({ \
+ long __ret = timeout; \
+ if (!___wait_cond_timeout(condition)) \
+ __ret = __swait_event_interruptible_timeout(wq, \
+ condition, timeout); \
+ __ret; \
+})
+
#define __cwait_event_hrtimeout(wq, condition, timeout, state) \
({ \
int __ret = 0; \
@@ -500,6 +656,8 @@ do { \
__ret; \
})

+/* no __swait_event_hrtimeout yet, as the cwait version has zero users */
+
/**
* cwait_event_hrtimeout - sleep until a condition gets true or a
* timeout elapses
@@ -957,9 +1115,14 @@ do { \
* Waitqueues which are removed from the waitqueue_head at wakeup time
*/
void prepare_to_cwait(struct cwait_head *q, struct cwait *wait, int state);
+void prepare_to_swait(struct swait_head *q, struct swait *wait, int state);
+void __prepare_to_swait(struct swait_head *q, struct swait *wait);
void prepare_to_cwait_exclusive(struct cwait_head *q, struct cwait *wait, int state);
long prepare_to_cwait_event(struct cwait_head *q, struct cwait *wait, int state);
+long prepare_to_swait_event(struct swait_head *q, struct swait *wait, int state);
void finish_cwait(struct cwait_head *q, struct cwait *wait);
+void finish_swait(struct swait_head *q, struct swait *wait);
+void __finish_swait(struct swait_head *q, struct swait *wait);
void abort_exclusive_cwait(struct cwait_head *q, struct cwait *wait, unsigned int mode, void *key);
int autoremove_cwake_function(struct cwait *wait, unsigned mode, int sync, void *key);
int cwake_bit_function(struct cwait *wait, unsigned mode, int sync, void *key);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index e62bd9c8aaf7..634427c25945 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -2,7 +2,25 @@
* Generic waiting primitives.
*
* (C) 2004 Nadia Yvette Chambers, Oracle
+ *
+ * There are two different types of wait queues, the complex ones and the
+ * simple ones. The complex ones are the original implementation, with
+ * custom callbacks and waiter specific flags (EXCLUSIVE). But most users
+ * simply use the default (try_to_wake_up) callback and don't bother with
+ * any exclusive wakeup filtering, or key passing.
+ *
+ * Given that, we added simple wait queue support, which doesn't support
+ * custom callbacks or exclusive flags. This reduces the associated struct
+ * sizes. The simple wait queue manipulations are also done under the
+ * protection of a raw lock, which enables them to be used for general
+ * infrastructural tasks (completions, RCU, etc.) in a preempt-rt kernel.
+ *
+ * The two implementations exist as cwait_XYZ() and swait_XYZ(), and they
+ * parallel each other as much as possible. Evidently there are some cases
+ * where no such swait parallel function exists -- as in the case for the
+ * exclusive variants of the cwait functions.
*/
+
#include <linux/init.h>
#include <linux/export.h>
#include <linux/sched.h>
@@ -19,6 +37,16 @@ void __init_cwait_head(struct cwait_head *q, const char *name,
}
EXPORT_SYMBOL(__init_cwait_head);

+void __init_swait_head(struct swait_head *q, const char *name,
+ struct lock_class_key *key)
+{
+ raw_spin_lock_init(&q->lock);
+ lockdep_set_class_and_name(&q->lock, key, name);
+ INIT_LIST_HEAD(&q->task_list);
+}
+EXPORT_SYMBOL(__init_swait_head);
+
+
void add_cwait(struct cwait_head *q, struct cwait *wait)
{
unsigned long flags;
@@ -30,6 +58,16 @@ void add_cwait(struct cwait_head *q, struct cwait *wait)
}
EXPORT_SYMBOL(add_cwait);

+void add_swait(struct swait_head *q, struct swait *wait)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&q->lock, flags);
+ __add_swait(q, wait);
+ raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(add_swait);
+
void add_cwait_exclusive(struct cwait_head *q, struct cwait *wait)
{
unsigned long flags;
@@ -51,6 +89,16 @@ void remove_cwait(struct cwait_head *q, struct cwait *wait)
}
EXPORT_SYMBOL(remove_cwait);

+void remove_swait(struct swait_head *q, struct swait *wait)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&q->lock, flags);
+ __remove_swait(q, wait);
+ raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(remove_swait);
+

/*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
@@ -75,6 +123,32 @@ static void __cwake_up_common(struct cwait_head *q, unsigned int mode,
}
}

+static void __swake_up_common(struct swait_head *q, unsigned int mode,
+ int nr_exclusive)
+{
+ struct swait *curr, *next;
+ int woken = 0;
+
+ list_for_each_entry_safe(curr, next, &q->task_list, node) {
+ if (wake_up_state(curr->task, mode)) { /* <-- calls ttwu() */
+ __remove_swait(q, curr);
+ curr->task = NULL;
+ /*
+ * The waiting task can free the waiter as
+ * soon as curr->task = NULL is written,
+ * without taking any locks. A memory barrier
+ * is required here to prevent the following
+ * store to curr->task from getting ahead of
+ * the dequeue operation.
+ */
+ smp_wmb();
+ if (++woken == nr_exclusive)
+ break;
+ }
+
+ }
+}
+
/**
* __cwake_up - wake up threads blocked on a waitqueue.
* @q: the complex waitqueue
@@ -96,6 +170,19 @@ void __cwake_up(struct cwait_head *q, unsigned int mode, int nr_exclusive,
}
EXPORT_SYMBOL(__cwake_up);

+void __swake_up(struct swait_head *q, unsigned int mode, int nr_exclusive)
+{
+ unsigned long flags;
+
+ if (!swait_active(q))
+ return;
+
+ raw_spin_lock_irqsave(&q->lock, flags);
+ __swake_up_common(q, mode, nr_exclusive);
+ raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(__swake_up);
+
/*
* Same as __cwake_up but called with the spinlock in struct cwait_head held.
*/
@@ -105,6 +192,15 @@ void __cwake_up_locked(struct cwait_head *q, unsigned int mode, int nr)
}
EXPORT_SYMBOL_GPL(__cwake_up_locked);

+void __swake_up_locked(struct swait_head *q, unsigned int state, int nr)
+{
+ if (!swait_active(q))
+ return;
+
+ __swake_up_common(q, state, nr);
+}
+EXPORT_SYMBOL_GPL(__swake_up_locked);
+
void __cwake_up_locked_key(struct cwait_head *q, unsigned int mode, void *key)
{
__cwake_up_common(q, mode, 1, 0, key);
@@ -180,6 +276,24 @@ void prepare_to_cwait(struct cwait_head *q, struct cwait *wait, int state)
}
EXPORT_SYMBOL(prepare_to_cwait);

+void __prepare_to_swait(struct swait_head *q, struct swait *wait)
+{
+ wait->task = current;
+ if (list_empty(&wait->node))
+ __add_swait(q, wait);
+}
+
+void prepare_to_swait(struct swait_head *q, struct swait *wait, int state)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&q->lock, flags);
+ __prepare_to_swait(q, wait);
+ set_current_state(state);
+ raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_swait);
+
void prepare_to_cwait_exclusive(struct cwait_head *q, struct cwait *wait,
int state)
{
@@ -218,6 +332,17 @@ long prepare_to_cwait_event(struct cwait_head *q, struct cwait *wait, int state)
}
EXPORT_SYMBOL(prepare_to_cwait_event);

+long prepare_to_swait_event(struct swait_head *q, struct swait *wait, int state)
+{
+ if (signal_pending_state(state, current))
+ return -ERESTARTSYS;
+
+ prepare_to_swait(q, wait, state);
+
+ return 0;
+}
+EXPORT_SYMBOL(prepare_to_swait_event);
+
/**
* finish_cwait - clean up after waiting in a queue
* @q: complex waitqueue waited on
@@ -253,6 +378,24 @@ void finish_cwait(struct cwait_head *q, struct cwait *wait)
}
EXPORT_SYMBOL(finish_cwait);

+void __finish_swait(struct swait_head *q, struct swait *wait)
+{
+ __set_current_state(TASK_RUNNING);
+
+ if (wait->task)
+ __remove_swait(q, wait);
+}
+
+void finish_swait(struct swait_head *q, struct swait *wait)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&q->lock, flags);
+ __finish_swait(q, wait);
+ raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(finish_swait);
+
/**
* abort_exclusive_cwait - abort exclusive waiting in a queue
* @q: waitqueue waited on
--
1.9.2
Peter Zijlstra
2014-10-18 21:34:17 UTC
Permalink
Post by Paul Gortmaker
@@ -75,6 +123,32 @@ static void __cwake_up_common(struct cwait_head *q, unsigned int mode,
}
}
+static void __swake_up_common(struct swait_head *q, unsigned int mode,
+ int nr_exclusive)
+{
+ struct swait *curr, *next;
+ int woken = 0;
+
+ list_for_each_entry_safe(curr, next, &q->task_list, node) {
+ if (wake_up_state(curr->task, mode)) { /* <-- calls ttwu() */
+ __remove_swait(q, curr);
+ curr->task = NULL;
+ /*
+ * The waiting task can free the waiter as
+ * soon as curr->task = NULL is written,
+ * without taking any locks. A memory barrier
+ * is required here to prevent the following
+ * store to curr->task from getting ahead of
+ * the dequeue operation.
+ */
+ smp_wmb();
+ if (++woken == nr_exclusive)
+ break;
+ }
+
+ }
+}
+
/**
* __cwake_up - wake up threads blocked on a waitqueue.
@@ -96,6 +170,19 @@ void __cwake_up(struct cwait_head *q, unsigned int mode, int nr_exclusive,
}
EXPORT_SYMBOL(__cwake_up);
+void __swake_up(struct swait_head *q, unsigned int mode, int nr_exclusive)
+{
+ unsigned long flags;
+
+ if (!swait_active(q))
+ return;
+
+ raw_spin_lock_irqsave(&q->lock, flags);
+ __swake_up_common(q, mode, nr_exclusive);
+ raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(__swake_up);
Same comment as before, that is an unbounded loop in a non preemptible
section and therefore violates RT design principles.

We actually did talk about ways of fixing that.

Also, I'm not entirely sure we want to do the cwait thing, it looks
painful.
Steven Rostedt
2014-10-18 23:05:19 UTC
Permalink
Post by Peter Zijlstra
Same comment as before, that is an unbounded loop in a non preemptible
section and therefore violates RT design principles.
We actually did talk about ways of fixing that.
Right, and we should slap Paul for not showing up for it ;-)

The decision that we came up with was to splice the current list onto a
local list variable. And then we could go into a loop releasing the lock
and grabbing it again. Each time pop a waiter off the list and doing the
work of only one task at a time. This prevents doing large amounts of
wake ups under a spinlock. The splice is required to only wake up those
that are on the list when the wake up is called. This prevents waking up
a task twice because it woke up, removed itself, and then added itself
again. We must keep the semantics that a wake up only wakes up a task
once.
Post by Peter Zijlstra
Also, I'm not entirely sure we want to do the cwait thing, it looks
painful.
Yeah, I have to think about that some more too. I'm currently sitting in
the airport waiting for my final leg of my flight. After 18 hours of
travel, it is probably not too wise to review this work in my current
state ;-)

-- Steve
Paul Gortmaker
2014-10-20 15:21:44 UTC
Permalink
Post by Steven Rostedt
Post by Peter Zijlstra
Same comment as before, that is an unbounded loop in a non preemptible
section and therefore violates RT design principles.
We actually did talk about ways of fixing that.
Right, and we should slap Paul for not showing up for it ;-)
And miss turkey day? ;-)
Post by Steven Rostedt
The decision that we came up with was to splice the current list onto a
local list variable. And then we could go into a loop releasing the lock
and grabbing it again. Each time pop a waiter off the list and doing the
work of only one task at a time. This prevents doing large amounts of
wake ups under a spinlock. The splice is required to only wake up those
that are on the list when the wake up is called. This prevents waking up
a task twice because it woke up, removed itself, and then added itself
again. We must keep the semantics that a wake up only wakes up a task
once.
OK, amusing enough, when we were actively discussing this some time ago,
I'd played with something similar -- I'd created a shadow list, and then
abstracted out the lock/unlock, so that we could call a synchronize_wait
on the unlock operations. What I didn't do, was try and use the same
lock for the shadow list and the main one, and lockdep never let me live
that down, so as tglx would say, I shoved it all in the horror closet.

I'd like to hear more details on what you had in mind here, so I don't
go chasing down the wrong road. So the local list head gets all the
items (via list_cut or moves?) and then that local list is spliced onto
the (now temporarily empty) main list head? (presumably all under lock)

What would need to be done as an unwind at the end of processing the
local list head before it disappears from existence? Anything?
Post by Steven Rostedt
Post by Peter Zijlstra
Also, I'm not entirely sure we want to do the cwait thing, it looks
painful.
Yeah, I have to think about that some more too. I'm currently sitting in
the airport waiting for my final leg of my flight. After 18 hours of
travel, it is probably not too wise to review this work in my current
state ;-)
The alignment/parallel of existing mainline wait code seemed like the
consensus back ages ago when this was being discussed on IRC, but if
that has since changed, then I can adapt or abandon as required. I long
ago learned that the time spent on something has no correlation to its
fitness or probability of being ready for addition to mainline. :-)

Thanks,
Paul.
--
Post by Steven Rostedt
-- Steve
Steven Rostedt
2014-10-20 15:40:13 UTC
Permalink
On Mon, 20 Oct 2014 11:21:44 -0400
Post by Paul Gortmaker
Post by Steven Rostedt
Right, and we should slap Paul for not showing up for it ;-)
And miss turkey day? ;-)
Replace it with Alt Beer day!
Post by Paul Gortmaker
I'd like to hear more details on what you had in mind here, so I don't
go chasing down the wrong road. So the local list head gets all the
items (via list_cut or moves?) and then that local list is spliced onto
the (now temporarily empty) main list head? (presumably all under lock)
No. You move the items off the main list head and add it to the local
list and they never go back. Just start processing that local list.
Anything added to the main list after that will not get woken up by
that current wake_all call. It will need to be woken by another wake_up.
Post by Paul Gortmaker
What would need to be done as an unwind at the end of processing the
local list head before it disappears from existence? Anything?
Not sure what you mean here.
Post by Paul Gortmaker
Post by Steven Rostedt
Post by Peter Zijlstra
Also, I'm not entirely sure we want to do the cwait thing, it looks
painful.
Yeah, I have to think about that some more too. I'm currently sitting in
the airport waiting for my final leg of my flight. After 18 hours of
travel, it is probably not too wise to review this work in my current
state ;-)
The alignment/parallel of existing mainline wait code seemed like the
consensus back ages ago when this was being discussed on IRC, but if
that has since changed, then I can adapt or abandon as required. I long
ago learned that the time spent on something has no correlation to its
fitness or probability of being ready for addition to mainline. :-)
heh, yeah. I'm guessing the point of all that is that anyone using the
wake_queue() will default to the simple version? And then one would
have to specify the complex version explicitly? But to do that we make
all these funny steps? May be OK, I still haven't spent much thought
on it.

-- Steve
Paul Gortmaker
2014-10-20 16:05:42 UTC
Permalink
Post by Steven Rostedt
On Mon, 20 Oct 2014 11:21:44 -0400
Post by Paul Gortmaker
Post by Steven Rostedt
Right, and we should slap Paul for not showing up for it ;-)
And miss turkey day? ;-)
Replace it with Alt Beer day!
Post by Paul Gortmaker
I'd like to hear more details on what you had in mind here, so I don't
go chasing down the wrong road. So the local list head gets all the
items (via list_cut or moves?) and then that local list is spliced onto
the (now temporarily empty) main list head? (presumably all under lock)
No. You move the items off the main list head and add it to the local
list and they never go back. Just start processing that local list.
Anything added to the main list after that will not get woken up by
that current wake_all call. It will need to be woken by another wake_up.
OK. But we may not run all of the wakeups, because of:

+ if (++woken == nr_exclusive)
+ break;
Post by Steven Rostedt
Post by Paul Gortmaker
What would need to be done as an unwind at the end of processing the
local list head before it disappears from existence? Anything?
Not sure what you mean here.
Per above -- can't there be "orphaned" entries that only exist on the
local list head that didn't get processed? What happens to those?
Post by Steven Rostedt
Post by Paul Gortmaker
Post by Steven Rostedt
Post by Peter Zijlstra
Also, I'm not entirely sure we want to do the cwait thing, it looks
painful.
Yeah, I have to think about that some more too. I'm currently sitting in
the airport waiting for my final leg of my flight. After 18 hours of
travel, it is probably not too wise to review this work in my current
state ;-)
The alignment/parallel of existing mainline wait code seemed like the
consensus back ages ago when this was being discussed on IRC, but if
that has since changed, then I can adapt or abandon as required. I long
ago learned that the time spent on something has no correlation to its
fitness or probability of being ready for addition to mainline. :-)
heh, yeah. I'm guessing the point of all that is that anyone using the
wake_queue() will default to the simple version? And then one would
have to specify the complex version explicitly? But to do that we make
all these funny steps? May be OK, I still haven't spent much thought
on it.
Yeah, that is pretty much it. But the way it is done here lets us get
there without requiring tree wide changes or flag day type of changes
right out of the gate (which IMO tend to hinder adoption/integration).

P.
--
Post by Steven Rostedt
-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Steven Rostedt
2014-10-20 16:47:16 UTC
Permalink
On Mon, 20 Oct 2014 12:05:42 -0400
Post by Paul Gortmaker
Post by Steven Rostedt
No. You move the items off the main list head and add it to the local
list and they never go back. Just start processing that local list.
Anything added to the main list after that will not get woken up by
that current wake_all call. It will need to be woken by another wake_up.
+ if (++woken == nr_exclusive)
+ break;
Post by Steven Rostedt
Post by Paul Gortmaker
What would need to be done as an unwind at the end of processing the
local list head before it disappears from existence? Anything?
Not sure what you mean here.
Per above -- can't there be "orphaned" entries that only exist on the
local list head that didn't get processed? What happens to those?
Why not just take off nr_exclusive tasks off the main list and add
those to the local list, and then wake up all on the local list?

-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Paul Gortmaker
2014-10-20 13:44:58 UTC
Permalink
Post by Peter Zijlstra
Post by Paul Gortmaker
@@ -75,6 +123,32 @@ static void __cwake_up_common(struct cwait_head *q, unsigned int mode,
}
}
+static void __swake_up_common(struct swait_head *q, unsigned int mode,
+ int nr_exclusive)
+{
+ struct swait *curr, *next;
+ int woken = 0;
+
+ list_for_each_entry_safe(curr, next, &q->task_list, node) {
+ if (wake_up_state(curr->task, mode)) { /* <-- calls ttwu() */
+ __remove_swait(q, curr);
+ curr->task = NULL;
+ /*
+ * The waiting task can free the waiter as
+ * soon as curr->task = NULL is written,
+ * without taking any locks. A memory barrier
+ * is required here to prevent the following
+ * store to curr->task from getting ahead of
+ * the dequeue operation.
+ */
+ smp_wmb();
+ if (++woken == nr_exclusive)
+ break;
+ }
+
+ }
+}
+
/**
* __cwake_up - wake up threads blocked on a waitqueue.
@@ -96,6 +170,19 @@ void __cwake_up(struct cwait_head *q, unsigned int mode, int nr_exclusive,
}
EXPORT_SYMBOL(__cwake_up);
+void __swake_up(struct swait_head *q, unsigned int mode, int nr_exclusive)
+{
+ unsigned long flags;
+
+ if (!swait_active(q))
+ return;
+
+ raw_spin_lock_irqsave(&q->lock, flags);
+ __swake_up_common(q, mode, nr_exclusive);
+ raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(__swake_up);
Same comment as before, that is an unbounded loop in a non preemptible
section and therefore violates RT design principles.
Yep, I hadn't forgot about that ; see patch 6/7 -- which has your
tentative solution from before. I didn't want to squish that into
here and lose sight of it ; same for the smp barriers - I wanted
to ensure we didn't lose visibility of things needing discussion.
Post by Peter Zijlstra
We actually did talk about ways of fixing that.
I'll follow up to Steve's comment on what he described.
Post by Peter Zijlstra
Also, I'm not entirely sure we want to do the cwait thing, it looks
painful.
The simplewait vs. complex wait as a whole, or just the rework to
make it more aligned with the existing code? FWIW, I'm not married
to this particular implementation; so if ideas have changed since,
and the plan is different than what v2 implements, that is no problem.

P.
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Paul Gortmaker
2014-10-18 00:23:00 UTC
Permalink
As of commit dae6e64d2bcfd4b06304ab864c7e3a4f6b5fedf4 ("rcu: Introduce
proper blocking to no-CBs kthreads GP waits") the RCU subsystem started
making use of wait queues.

Here we convert all additions of RCU wait queues to use simple wait queues,
since they don't need the extra overhead of the full wait queue features.

Originally this was done for RT kernels[1], since we would get things like...

BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
in_atomic(): 1, irqs_disabled(): 1, pid: 8, name: rcu_preempt
Pid: 8, comm: rcu_preempt Not tainted
Call Trace:
[<ffffffff8106c8d0>] __might_sleep+0xd0/0xf0
[<ffffffff817d77b4>] rt_spin_lock+0x24/0x50
[<ffffffff8106fcf6>] __wake_up+0x36/0x70
[<ffffffff810c4542>] rcu_gp_kthread+0x4d2/0x680
[<ffffffff8105f910>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff810c4070>] ? rcu_gp_fqs+0x80/0x80
[<ffffffff8105eabb>] kthread+0xdb/0xe0
[<ffffffff8106b912>] ? finish_task_switch+0x52/0x100
[<ffffffff817e0754>] kernel_thread_helper+0x4/0x10
[<ffffffff8105e9e0>] ? __init_kthread_worker+0x60/0x60
[<ffffffff817e0750>] ? gs_change+0xb/0xb

...and hence simple wait queues were deployed on RT out of necessity
(as simple wait uses a raw lock), but mainline might as well take
advantage of the more streamline support as well.

[1] This is a carry forward of work from v3.10-rt; the original conversion
was by Thomas on an earlier -rt version, and Sebastian extended it to
additional post-3.10 added RCU waiters; here I've added a commit log and
unified the RCU changes into one, and uprev'd it to match mainline RCU.

Cc: Thomas Gleixner <***@linutronix.de>
Cc: Sebastian Andrzej Siewior <***@linutronix.de>
Cc: Paul E. McKenney <***@linux.vnet.ibm.com>
Cc: Steven Rostedt <***@goodmis.org>
Signed-off-by: Paul Gortmaker <***@windriver.com>

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1b70cb6fbe3c..b8dc49c6bd4d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1398,7 +1398,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
!ACCESS_ONCE(rsp->gp_flags) ||
!rsp->gp_kthread)
return;
- wake_up(&rsp->gp_wq);
+ swake_up(&rsp->gp_wq);
}

/*
@@ -1779,9 +1779,9 @@ static int __noreturn rcu_gp_kthread(void *arg)
ACCESS_ONCE(rsp->gpnum),
TPS("reqwait"));
rsp->gp_state = RCU_GP_WAIT_GPS;
- wait_event_interruptible(rsp->gp_wq,
- ACCESS_ONCE(rsp->gp_flags) &
- RCU_GP_FLAG_INIT);
+ swait_event_interruptible(rsp->gp_wq,
+ ACCESS_ONCE(rsp->gp_flags) &
+ RCU_GP_FLAG_INIT);
/* Locking provides needed memory barrier. */
if (rcu_gp_init(rsp))
break;
@@ -1807,7 +1807,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
ACCESS_ONCE(rsp->gpnum),
TPS("fqswait"));
rsp->gp_state = RCU_GP_WAIT_FQS;
- ret = wait_event_interruptible_timeout(rsp->gp_wq,
+ ret = swait_event_interruptible_timeout(rsp->gp_wq,
((gf = ACCESS_ONCE(rsp->gp_flags)) &
RCU_GP_FLAG_FQS) ||
(!ACCESS_ONCE(rnp->qsmask) &&
@@ -1928,7 +1928,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
{
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
- wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
+ swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
}

/*
@@ -2507,7 +2507,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
}
ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS;
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
- wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
+ swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
}

/*
@@ -3630,7 +3630,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
}

rsp->rda = rda;
- init_waitqueue_head(&rsp->gp_wq);
+ init_swait_head(&rsp->gp_wq);
rnp = rsp->level[rcu_num_lvls - 1];
for_each_possible_cpu(i) {
while (i > rnp->grphi)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6a86eb7bac45..e44c58cf1200 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -208,7 +208,7 @@ struct rcu_node {
/* This can happen due to race conditions. */
#endif /* #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_NOCB_CPU
- wait_queue_head_t nocb_gp_wq[2];
+ struct swait_head nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int need_future_gp[2];
@@ -348,7 +348,7 @@ struct rcu_data {
atomic_long_t nocb_follower_count_lazy; /* (approximate). */
int nocb_p_count; /* # CBs being invoked by kthread */
int nocb_p_count_lazy; /* (approximate). */
- wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
+ struct swait_head nocb_wq; /* For nocb kthreads to sleep on. */
struct task_struct *nocb_kthread;
bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */

@@ -434,7 +434,7 @@ struct rcu_state {
unsigned long gpnum; /* Current gp number. */
unsigned long completed; /* # of last completed gp. */
struct task_struct *gp_kthread; /* Task for grace periods. */
- wait_queue_head_t gp_wq; /* Where GP task waits. */
+ struct swait_head gp_wq; /* Where GP task waits. */
short gp_flags; /* Commands for GP task. */
short gp_state; /* GP kthread sleep state. */

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a7997e272564..2da4755dc2a8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2033,7 +2033,7 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
*/
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
{
- wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+ swake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
}

/*
@@ -2051,8 +2051,8 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)

static void rcu_init_one_nocb(struct rcu_node *rnp)
{
- init_waitqueue_head(&rnp->nocb_gp_wq[0]);
- init_waitqueue_head(&rnp->nocb_gp_wq[1]);
+ init_swait_head(&rnp->nocb_gp_wq[0]);
+ init_swait_head(&rnp->nocb_gp_wq[1]);
}

#ifndef CONFIG_RCU_NOCB_CPU_ALL
@@ -2077,7 +2077,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
/* Prior xchg orders against prior callback enqueue. */
ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
- wake_up(&rdp_leader->nocb_wq);
+ swake_up(&rdp_leader->nocb_wq);
}
}

@@ -2224,7 +2224,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
*/
trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
for (;;) {
- wait_event_interruptible(
+ swait_event_interruptible(
rnp->nocb_gp_wq[c & 0x1],
(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
if (likely(d))
@@ -2353,7 +2353,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
if (!rcu_nocb_poll) {
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
"FollowerSleep");
- wait_event_interruptible(rdp->nocb_wq,
+ swait_event_interruptible(rdp->nocb_wq,
ACCESS_ONCE(rdp->nocb_follower_head));
} else if (firsttime) {
/* Don't drown trace log with "Poll"! */
@@ -2455,7 +2455,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
{
rdp->nocb_tail = &rdp->nocb_head;
- init_waitqueue_head(&rdp->nocb_wq);
+ init_swait_head(&rdp->nocb_wq);
rdp->nocb_follower_tail = &rdp->nocb_follower_head;
}
--
1.9.2
Paul Gortmaker
2014-10-18 00:23:02 UTC
Permalink
*** not for merge ; for discussion only ***

This matches the simple wait code barrier usage with what was
in use in the -rt version of simplewait support - where without
this commit, it is matching the barrier usage from the existing
mainline complex wait code.

After the previous submission of simple wait for mainline, there was
some discussion about the valididty of the barriers between Steve
and Peter on IRC, so I'm putting this here as a point of discussion
to continue/close on that previous discussion.

One of the original -rt additions from Steve can be seen here:
https://lkml.org/lkml/2013/8/19/275

With the delta highlighted here in this standalone change, it
almost seems that the additional barriers used in -rt are a
consequence of -rt using __set_current_state() instead of the
barrier version set_current_state() -- as the complex wait
version has an explicit comment of why it needs the barrier
enabled version of set_current_state() vs. the non-barrier one.

But that is just my guess; the barrier experts need to throw
their $0.02 into this discussion.

Cc: Steven Rostedt <***@goodmis.org>
Cc: Peter Zijlstra <***@infradead.org>
Cc: Paul E. McKenney <***@linux.vnet.ibm.com>
Not-signed-off-by: Paul Gortmaker <***@windriver.com>

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 46e2591c22b6..21271b61aec8 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -181,6 +181,8 @@ static inline int cwait_active(struct cwait_head *q)

static inline int swait_active(struct swait_head *q)
{
+ /* Make sure the condition is visible before checking list_empty() */
+ smp_mb();
return !list_empty(&q->task_list);
}

@@ -205,6 +207,8 @@ static inline void __add_cwait(struct cwait_head *head, struct cwait *new)
static inline void __add_swait(struct swait_head *head, struct swait *new)
{
list_add(&new->node, &head->task_list);
+ /* We can't let the condition leak before the setting of head */
+ smp_mb();
}

/*
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 25e5886ed8d9..c0575973d4d4 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -298,7 +298,7 @@ void prepare_to_swait(struct swait_head *q, struct swait *wait, int state)

raw_spin_lock_irqsave(&q->lock, flags);
__prepare_to_swait(q, wait);
- set_current_state(state);
+ __set_current_state(state); /* urk! see cwait barrier note above */
raw_spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(prepare_to_swait);
--
1.9.2
Loading...