Discussion:
[RFC 0/4] Disable timer tick for SCHED_FIFO tasks
Jan Blunck
2010-09-08 12:29:20 UTC
Permalink
Here are some patches that I have been working on a few months ago.

For some applications (e.g. FTQ) you can see the jitter that is introduced by
the timer tick in the result data set. Therefore the task was to completely
disable the timer tick or at least reduce the impact on the application.

This patchset is reusing the infrastructure that disables the timer tick during
idle periods to disable it as well when SCHED_FIFO tasks are executed.

The results of the FTQ workload show that the timer tick is getting disabled
long periods but the impact of a single interruption seems to increase.
Possible, this is due to the fact that the timer tick needs to be enabled again
during interrupts.

Other known issues:
- Currently the time accounting is totally broken but that was not the primary
focus.

Rants? Thoughts? Complaints?

Jan


Jan Blunck (4):
ftrace: Add events for tracing timer interrupts
ftrace: Add events for tracing tick start and stop
Disable scheduler tick when we are running SCHED_FIFO tasks
ftrace: Add argument to tick start/stop tracing

arch/x86/kernel/apic/apic.c | 4 +++
arch/x86/kernel/time.c | 5 ++++
include/linux/tick.h | 14 +++++++++++-
include/trace/events/irq.h | 38 ++++++++++++++++++++++++++++++++++
include/trace/events/sched.h | 46 ++++++++++++++++++++++++++++++++++++++++++
kernel/sched_rt.c | 23 +++++++++++++++++++++
kernel/softirq.c | 5 ++++
kernel/time/tick-sched.c | 40 ++++++++++++++++++++++++-----------
8 files changed, 160 insertions(+), 15 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Blunck
2010-09-08 12:29:21 UTC
Permalink
Trace the execution of the timer interrupt. I did not find an argument that
makes any sense so traces look like this:

<idle>-0 [000] 181019.693546: timerirq_enter: unused=1
<idle>-0 [000] 181019.693553: timerirq_exit: unused=1

Signed-off-by: Jan Blunck <***@suse.de>
---
arch/x86/kernel/apic/apic.c | 4 ++++
arch/x86/kernel/time.c | 5 +++++
include/trace/events/irq.h | 38 ++++++++++++++++++++++++++++++++++++++
3 files changed, 47 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a96489e..edd775b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,8 @@
#include <linux/smp.h>
#include <linux/mm.h>

+#include <trace/events/irq.h>
+
#include <asm/perf_event.h>
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
@@ -823,7 +825,9 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
*/
exit_idle();
irq_enter();
+ trace_timerirq_enter(1);
local_apic_timer_interrupt();
+ trace_timerirq_exit(1);
irq_exit();

set_irq_regs(old_regs);
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e..01c2395 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -14,6 +14,8 @@
#include <linux/time.h>
#include <linux/mca.h>

+#include <trace/events/irq.h>
+
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
#include <asm/i8259.h>
@@ -63,6 +65,8 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
/* Keep nmi watchdog up to date */
inc_irq_stat(irq0_irqs);

+ trace_timerirq_enter(0);
+
/* Optimized out for !IO_APIC and x86_64 */
if (timer_ack) {
/*
@@ -83,6 +87,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
if (MCA_bus)
outb_p(inb_p(0x61)| 0x80, 0x61);

+ trace_timerirq_exit(0);
return IRQ_HANDLED;
}

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 0e4cfb6..c638ab0 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -136,6 +136,44 @@ DEFINE_EVENT(softirq, softirq_exit,
TP_ARGS(h, vec)
);

+/**
+ * timerirq_enter
+ */
+TRACE_EVENT(timerirq_enter,
+
+ TP_PROTO(int unused),
+
+ TP_ARGS(unused),
+
+ TP_STRUCT__entry(
+ __field(u64, unused)
+ ),
+
+ TP_fast_assign(
+ __entry->unused = unused;
+ ),
+ TP_printk("unused=%lu", (unsigned long)__entry->unused)
+);
+
+/**
+ * timerirq_exit
+ */
+TRACE_EVENT(timerirq_exit,
+
+ TP_PROTO(int unused),
+
+ TP_ARGS(unused),
+
+ TP_STRUCT__entry(
+ __field(u64, unused)
+ ),
+
+ TP_fast_assign(
+ __entry->unused = unused;
+ ),
+ TP_printk("unused=%lu", (unsigned long)__entry->unused)
+);
+
#endif /* _TRACE_IRQ_H */

/* This part must be outside protection */
--
1.6.4.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Peter Zijlstra
2010-09-08 12:38:23 UTC
Permalink
Post by Jan Blunck
Trace the execution of the timer interrupt. I did not find an argument that
<idle>-0 [000] 181019.693546: timerirq_enter: unused=1
<idle>-0 [000] 181019.693553: timerirq_exit: unused=1
So what's wrong with the normal IRQ enter/exit tracepoints?
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Andi Kleen
2010-09-08 13:24:01 UTC
Permalink
Post by Jan Blunck
Trace the execution of the timer interrupt. I did not find an argument that
<idle>-0 [000] 181019.693546: timerirq_enter: unused=1
<idle>-0 [000] 181019.693553: timerirq_exit: unused=1
I think you can get the same by just filtering the normal
irq entry for the timer irq number.

-Andi
--
***@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Blunck
2010-09-08 12:29:22 UTC
Permalink
Trace the starting and stopping of the scheduler tick. The traces look like
this:

<idle>-0 [000] 187573.495750: sched_tick_stop: idle=1, expires=6876704000000
<idle>-0 [000] 187573.629998: sched_tick_stop: idle=0, expires=6876704000000
<idle>-0 [000] 187573.764273: sched_tick_stop: idle=0, expires=6876704000000
<idle>-0 [000] 187573.898508: sched_tick_stop: idle=0, expires=6876704000000
<idle>-0 [000] 187574.009284: sched_tick_start: now=6876232761127

In this trace you can see how tick_nohz_stop_sched_tick() is called from
the idle thread (idle=1) and later through irq_exit() (idle=0).

Signed-off-by: Jan Blunck <***@suse.de>
---
include/trace/events/sched.h | 40 ++++++++++++++++++++++++++++++++++++++++
kernel/time/tick-sched.c | 21 ++++++++++++++++-----
2 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b9e1dd6..36385b6 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -380,6 +380,46 @@ TRACE_EVENT(sched_stat_runtime,
(unsigned long long)__entry->vruntime)
);

+TRACE_EVENT(sched_tick_start,
+
+ TP_PROTO(ktime_t *now),
+
+ TP_ARGS(now),
+
+ TP_STRUCT__entry(
+ __field(s64, tv64)
+ ),
+
+ TP_fast_assign(
+ __entry->tv64 = now->tv64;
+ ),
+ TP_printk("now=%ld",
+ (long)__entry->tv64)
+
+);
+
+TRACE_EVENT(sched_tick_stop,
+
+ TP_PROTO(ktime_t *expires, int idle),
+
+ TP_ARGS(expires, idle),
+
+ TP_STRUCT__entry(
+ __field(s64, tv64)
+ __field(int, idle)
+ ),
+
+ TP_fast_assign(
+ __entry->tv64 = expires->tv64;
+ __entry->idle = idle;
+ ),
+ TP_printk("idle=%d, expires=%ld%s",
+ __entry->idle,
+ (long)__entry->tv64,
+ (__entry->tv64 == KTIME_MAX) ? " (KTIME_MAX)" : "" )
+
+);
+
#endif /* _TRACE_SCHED_H */

/* This part must be outside protection */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 813993b..81b7398 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -22,6 +22,8 @@
#include <linux/tick.h>
#include <linux/module.h>

+#include <trace/events/sched.h>
+
#include <asm/irq_regs.h>

#include "tick-internal.h"
@@ -428,9 +430,11 @@ void tick_nohz_stop_sched_tick(int inidle)
* If the expiration time == KTIME_MAX, then
* in this case we simply stop the tick timer.
*/
- if (unlikely(expires.tv64 == KTIME_MAX)) {
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+ if (unlikely(expires.tv64 == KTIME_MAX)) {
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ trace_sched_tick_stop(&expires, inidle);
hrtimer_cancel(&ts->sched_timer);
+ }
goto out;
}

@@ -438,10 +442,15 @@ void tick_nohz_stop_sched_tick(int inidle)
hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
- if (hrtimer_active(&ts->sched_timer))
- goto out;
- } else if (!tick_program_event(expires, 0))
+ if (hrtimer_active(&ts->sched_timer)) {
+ trace_sched_tick_stop(&expires, inidle);
goto out;
+ }
+ } else if (!tick_program_event(expires, 0)) {
+ trace_sched_tick_stop(&expires, inidle);
+ goto out;
+ }
+
/*
* We are past the event already. So we crossed a
* jiffie boundary. Update jiffies and raise the
@@ -495,6 +504,8 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
tick_do_update_jiffies64(now);
now = ktime_get();
}
+
+ trace_sched_tick_start(&now);
}

/**
--
1.6.4.2
Jan Blunck
2010-09-08 12:29:23 UTC
Permalink
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.

Signed-off-by: Jan Blunck <***@suse.de>
---
kernel/sched_rt.c | 23 +++++++++++++++++++++++
kernel/softirq.c | 5 +++++
kernel/time/tick-sched.c | 7 ++++---
3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8afb953..3879ca1 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1095,6 +1095,13 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
rq->post_schedule = has_pushable_tasks(rq);
#endif

+ if (p) {
+ /* Disable sched_tick in post_schedule later */
+ if (unlikely(rt_task(p)) && !(p->flags & PF_KTHREAD) &&
+ (p->policy == SCHED_FIFO))
+ rq->post_schedule = 1;
+ }
+
return p;
}

@@ -1475,11 +1482,27 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
/* Try to pull RT tasks here if we lower this rq's prio */
if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio)
pull_rt_task(rq);
+
+ /* Enable sched_tick again before we schedule */
+ if (unlikely(rt_task(prev)) && !(prev->flags & PF_KTHREAD) &&
+ (prev->policy == SCHED_FIFO)) {
+ tick_nohz_restart_sched_tick();
+
+ /* Disable tick in post_schedule if we don't switch */
+ rq->post_schedule = 1;
+ }
}

static void post_schedule_rt(struct rq *rq)
{
push_rt_tasks(rq);
+
+ /* Disable tick if we are a FIFO task */
+ if (unlikely(rt_task(rq->curr)) &&
+ unlikely(!local_softirq_pending()) &&
+ !(rq->curr->flags & PF_KTHREAD) &&
+ (rq->curr->policy == SCHED_FIFO))
+ tick_nohz_stop_sched_tick(1);
}

/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 07b4f1b..ff05f6a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -307,6 +307,11 @@ void irq_exit(void)
/* Make sure that timer wheel updates are propagated */
if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
tick_nohz_stop_sched_tick(0);
+
+ /* Disable tick if the current task is FIFO */
+ if (unlikely(rt_task(current) && !(current->flags & PF_KTHREAD) &&
+ current->policy == SCHED_FIFO))
+ tick_nohz_stop_sched_tick(1);
#endif
preempt_enable_no_resched();
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 81b7398..567110d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -520,9 +520,10 @@ void tick_nohz_restart_sched_tick(void)
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
unsigned long ticks;
#endif
+ unsigned long flags;
ktime_t now;

- local_irq_disable();
+ local_irq_save(flags);
if (ts->idle_active || (ts->inidle && ts->tick_stopped))
now = ktime_get();

@@ -531,7 +532,7 @@ void tick_nohz_restart_sched_tick(void)

if (!ts->inidle || !ts->tick_stopped) {
ts->inidle = 0;
- local_irq_enable();
+ local_irq_restore(flags);
return;
}

@@ -567,7 +568,7 @@ void tick_nohz_restart_sched_tick(void)

tick_nohz_restart(ts, now);

- local_irq_enable();
+ local_irq_restore(flags);
}

static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
--
1.6.4.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Peter Zijlstra
2010-09-08 12:43:34 UTC
Permalink
Post by Jan Blunck
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.
Why only FIFO and not also nr_running == 1?
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Blunck
2010-09-08 14:32:19 UTC
Permalink
Post by Peter Zijlstra
Post by Jan Blunck
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.
Why only FIFO and not also nr_running == 1?
Lazy Bastard Syndrome
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Peter Zijlstra
2010-09-08 12:45:43 UTC
Permalink
Post by Jan Blunck
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.
Also, doesn't this break any and all jiffies users?

And you need to restart the tick on call_rcu() and everything else that
requires the tick for processing.

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Thomas Gleixner
2010-09-08 13:21:44 UTC
Permalink
Post by Peter Zijlstra
Post by Jan Blunck
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.
Also, doesn't this break any and all jiffies users?
Only on UP. On SMP we hand off the do_timer duty to some other core.
Post by Peter Zijlstra
And you need to restart the tick on call_rcu() and everything else that
requires the tick for processing.
Not only this. If the task enqueues a timer_list timer via a syscall
or an interrupt/softirq enqueues a timer_list timer while the tick is
off then this timer will not fire until the task goes back into
schedule.

That approach is way too naive. There is a boatload of subtle wreckage
waiting and it needs careful analysis of this to work. Frederic is
looking into this already.

Thanks,

tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Blunck
2010-09-08 14:28:26 UTC
Permalink
Post by Peter Zijlstra
Post by Jan Blunck
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.
Also, doesn't this break any and all jiffies users?
Sure. It was enough to make FTQ run.
Post by Peter Zijlstra
And you need to restart the tick on call_rcu() and everything else that
requires the tick for processing.
I realized that before as well but somehow forgot about it. Checking for a
disabled timer tick on syscall entry would help. Like we do on irq entry as
well.

Jan
Andi Kleen
2010-09-08 17:11:53 UTC
Permalink
Post by Jan Blunck
Post by Peter Zijlstra
Post by Jan Blunck
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.
Also, doesn't this break any and all jiffies users?
Sure. It was enough to make FTQ run.
Post by Peter Zijlstra
And you need to restart the tick on call_rcu() and everything else that
requires the tick for processing.
I realized that before as well but somehow forgot about it. Checking for a
disabled timer tick on syscall entry would help. Like we do on irq entry as
well.
That doesn't help the gettimeofday/clock_gettime vDSOs
which access jiffie like state in user space.

I guess you could force the vdso to call into the kernel
during such a region, but you cannot force it for an
process that is already context switched in this path
when the option is first enabled.

I guess it would work if you force all the vDSOs
to always do a system call at boot, but that's pretty
costly.

-Andi
--
***@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Andi Kleen
2010-09-08 13:25:13 UTC
Permalink
Post by Jan Blunck
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.
How would these tasks accounted for then?

The CPU time accounting relies on the timer tick currently.

-Andi
--
***@linux.intel.com -- Speaking for myself only.
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Blunck
2010-09-08 14:47:08 UTC
Permalink
Post by Andi Kleen
Post by Jan Blunck
This patch is disabling the scheduler tick to go off when there is a task
with SCHED_FIFO policy running. Since these tasks are not timesliced anyway
we only care about timers, softirqs and such stuff just like when we disable
the tick during idle periods.
How would these tasks accounted for then?
The CPU time accounting relies on the timer tick currently.
Yes, that is a known issue.

Jan
Post by Andi Kleen
-Andi
--
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Blunck
2010-09-08 12:29:24 UTC
Permalink
With this patch it is possible to differentiate the idle tick stop from
the SCHED_FIFO tick stop.

Signed-off-by: Jan Blunck <***@suse.de>
---
include/linux/tick.h | 14 ++++++++++++--
include/trace/events/sched.h | 22 ++++++++++++++--------
kernel/sched_rt.c | 4 ++--
kernel/softirq.c | 2 +-
kernel/time/tick-sched.c | 20 +++++++++++---------
5 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index b232ccc..37b3d78 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -121,13 +121,23 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */

# ifdef CONFIG_NO_HZ
-extern void tick_nohz_stop_sched_tick(int inidle);
-extern void tick_nohz_restart_sched_tick(void);
+extern void __tick_nohz_stop_sched_tick(int inidle, int insched);
+static inline void tick_nohz_stop_sched_tick(int inidle)
+{
+ __tick_nohz_stop_sched_tick(inidle, 0);
+}
+extern void __tick_nohz_restart_sched_tick(int insched);
+static inline void tick_nohz_restart_sched_tick(void)
+{
+ __tick_nohz_restart_sched_tick(0);
+}
extern ktime_t tick_nohz_get_sleep_length(void);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
# else
+static inline void __tick_nohz_stop_sched_tick(int inidle, int insched) { }
static inline void tick_nohz_stop_sched_tick(int inidle) { }
+static inline void __tick_nohz_restart_sched_tick(int insched) { }
static inline void tick_nohz_restart_sched_tick(void) { }
static inline ktime_t tick_nohz_get_sleep_length(void)
{
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 36385b6..fd4307e 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -382,41 +382,47 @@ TRACE_EVENT(sched_stat_runtime,

TRACE_EVENT(sched_tick_start,

- TP_PROTO(ktime_t *now),
+ TP_PROTO(ktime_t *now, int sched_rt),

- TP_ARGS(now),
+ TP_ARGS(now, sched_rt),

TP_STRUCT__entry(
__field(s64, tv64)
+ __field(int, sched_rt)
),

TP_fast_assign(
__entry->tv64 = now->tv64;
+ __entry->sched_rt = sched_rt;
),
- TP_printk("now=%ld",
- (long)__entry->tv64)
+ TP_printk("now=%ld%s",
+ (long)__entry->tv64,
+ __entry->sched_rt ? ", SCHED_FIFO" : "")

);

TRACE_EVENT(sched_tick_stop,

- TP_PROTO(ktime_t *expires, int idle),
+ TP_PROTO(ktime_t *expires, int idle, int sched_rt),

- TP_ARGS(expires, idle),
+ TP_ARGS(expires, idle, sched_rt),

TP_STRUCT__entry(
__field(s64, tv64)
__field(int, idle)
+ __field(int, sched_rt)
),

TP_fast_assign(
__entry->tv64 = expires->tv64;
__entry->idle = idle;
+ __entry->sched_rt = sched_rt;
),
- TP_printk("idle=%d, expires=%ld%s",
+ TP_printk("idle=%d, expires=%ld%s%s",
__entry->idle,
(long)__entry->tv64,
- (__entry->tv64 == KTIME_MAX) ? " (KTIME_MAX)" : "" )
+ (__entry->tv64 == KTIME_MAX) ? " (KTIME_MAX)" : "",
+ __entry->sched_rt ? ", SCHED_FIFO" : "" )

);

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 3879ca1..c9386da 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1486,7 +1486,7 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
/* Enable sched_tick again before we schedule */
if (unlikely(rt_task(prev)) && !(prev->flags & PF_KTHREAD) &&
(prev->policy == SCHED_FIFO)) {
- tick_nohz_restart_sched_tick();
+ __tick_nohz_restart_sched_tick(1);

/* Disable tick in post_schedule if we don't switch */
rq->post_schedule = 1;
@@ -1502,7 +1502,7 @@ static void post_schedule_rt(struct rq *rq)
unlikely(!local_softirq_pending()) &&
!(rq->curr->flags & PF_KTHREAD) &&
(rq->curr->policy == SCHED_FIFO))
- tick_nohz_stop_sched_tick(1);
+ __tick_nohz_stop_sched_tick(1, 1);
}

/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ff05f6a..f0973be 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -311,7 +311,7 @@ void irq_exit(void)
/* Disable tick if the current task is FIFO */
if (unlikely(rt_task(current) && !(current->flags & PF_KTHREAD) &&
current->policy == SCHED_FIFO))
- tick_nohz_stop_sched_tick(1);
+ __tick_nohz_stop_sched_tick(1, 1);
#endif
preempt_enable_no_resched();
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 567110d..2a3bd0a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -257,7 +257,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
* Called either from the idle loop or from irq_exit() when an idle period was
* just interrupted by an interrupt which did not cause a reschedule.
*/
-void tick_nohz_stop_sched_tick(int inidle)
+void __tick_nohz_stop_sched_tick(int inidle, int insched)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
struct tick_sched *ts;
@@ -432,7 +432,8 @@ void tick_nohz_stop_sched_tick(int inidle)
*/
if (unlikely(expires.tv64 == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
- trace_sched_tick_stop(&expires, inidle);
+ trace_sched_tick_stop(&expires, inidle,
+ insched);
hrtimer_cancel(&ts->sched_timer);
}
goto out;
@@ -443,11 +444,12 @@ void tick_nohz_stop_sched_tick(int inidle)
HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer)) {
- trace_sched_tick_stop(&expires, inidle);
+ trace_sched_tick_stop(&expires, inidle,
+ insched);
goto out;
}
} else if (!tick_program_event(expires, 0)) {
- trace_sched_tick_stop(&expires, inidle);
+ trace_sched_tick_stop(&expires, inidle, insched);
goto out;
}

@@ -480,7 +482,7 @@ ktime_t tick_nohz_get_sleep_length(void)
return ts->sleep_length;
}

-static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
+static void tick_nohz_restart(struct tick_sched *ts, ktime_t now, int insched)
{
hrtimer_cancel(&ts->sched_timer);
hrtimer_set_expires(&ts->sched_timer, ts->idle_tick);
@@ -505,7 +507,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
now = ktime_get();
}

- trace_sched_tick_start(&now);
+ trace_sched_tick_start(&now, insched);
}

/**
@@ -513,7 +515,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
*
* Restart the idle tick when the CPU is woken up from idle
*/
-void tick_nohz_restart_sched_tick(void)
+void __tick_nohz_restart_sched_tick(int insched)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
@@ -566,7 +568,7 @@ void tick_nohz_restart_sched_tick(void)
ts->tick_stopped = 0;
ts->idle_exittime = now;

- tick_nohz_restart(ts, now);
+ tick_nohz_restart(ts, now, insched);

local_irq_restore(flags);
}
@@ -691,7 +693,7 @@ static void tick_nohz_kick_tick(int cpu, ktime_t now)
if (delta.tv64 <= tick_period.tv64)
return;

- tick_nohz_restart(ts, now);
+ tick_nohz_restart(ts, now, 0);
#endif
}
--
1.6.4.2
Loading...