Commit e3cf2a01 authored by Philippe Gerum's avatar Philippe Gerum
Browse files

evl: introduce runqueue lock



This is the last step to get rid of the ugly lock, which was still
required for serializing accesses to the runqueue information.
Signed-off-by: default avatarPhilippe Gerum <rpm@xenomai.org>
parent d68fbb82
......@@ -36,10 +36,18 @@
#ifdef CONFIG_SMP
#define assert_hard_lock(__lock) EVL_WARN_ON_ONCE(CORE, \
!(raw_spin_is_locked(__lock) && hard_irqs_disabled()))
#define assert_evl_lock(__lock) EVL_WARN_ON_ONCE(CORE, \
!(raw_spin_is_locked(&(__lock)->_lock) && oob_irqs_disabled()))
#else
#define assert_hard_lock(__lock) EVL_WARN_ON_ONCE(CORE, !hard_irqs_disabled())
#define assert_evl_lock(__lock) EVL_WARN_ON_ONCE(CORE, !oob_irqs_disabled())
#endif
#define assert_evl_lock(__lock) assert_hard_lock(&(__lock)->_lock)
#define assert_thread_pinned(__thread) \
do { \
assert_evl_lock(&(__thread)->lock); \
assert_evl_lock(&(__thread)->rq->lock); \
} while (0)
/* TEMP: needed until we have gotten rid of the infamous nklock. */
#ifdef CONFIG_SMP
......
......@@ -9,20 +9,20 @@
#include <evl/sched.h>
/* hard IRQs off. */
/* hard irqs off. */
static inline void evl_enter_irq(void)
{
struct evl_rq *rq = this_evl_rq();
rq->lflags |= RQ_IRQ;
rq->local_flags |= RQ_IRQ;
}
/* hard IRQs off. */
/* hard irqs off. */
static inline void evl_exit_irq(void)
{
struct evl_rq *rq = this_evl_rq();
struct evl_rq *this_rq = this_evl_rq();
rq->lflags &= ~RQ_IRQ;
this_rq->local_flags &= ~RQ_IRQ;
/*
* We are only interested in RQ_SCHED previously set by an OOB
......@@ -33,7 +33,7 @@ static inline void evl_exit_irq(void)
* CAUTION: Switching stages as a result of rescheduling may
* re-enable irqs, shut them off before returning if so.
*/
if (rq->status & RQ_SCHED) {
if (evl_need_resched(this_rq)) {
evl_schedule();
if (!hard_irqs_disabled())
hard_local_irq_disable();
......
......@@ -19,19 +19,18 @@
#include <evl/assert.h>
#include <evl/init.h>
/** Shared scheduler status bits **/
/*
* A rescheduling call is pending.
* Shared rq flags bits.
*
* A rescheduling operation is pending. May also be present in the
* private flags.
*/
#define RQ_SCHED 0x10000000
/**
* Private scheduler flags (combined in test operations with shared
* bits, must not conflict with them).
*/
/*
* Private rq flags (combined in test operation with shared bits by
* evl_schedule(), care for any conflict).
*
* Currently running in tick handler context.
*/
#define RQ_TIMER 0x00010000
......@@ -45,14 +44,13 @@
*/
#define RQ_IRQ 0x00004000
/*
* Proxy tick is deferred, because we have more urgent real-time
* duties to carry out first.
* Proxy tick is deferred, because we have more urgent out-of-band
* work to carry out first.
*/
#define RQ_TDEFER 0x00002000
/*
* Idle state: there is no outstanding timer. We check this flag to
* know whether we may allow the regular kernel to enter the CPU idle
* state.
* know whether we may allow inband to enter the CPU idle state.
*/
#define RQ_IDLE 0x00001000
/*
......@@ -65,13 +63,13 @@ struct evl_sched_fifo {
};
struct evl_rq {
unsigned long status; /* Shared flags */
unsigned long lflags; /* Private flags (lockless) */
evl_spinlock_t lock;
/*
* Shared data, covered by ->lock.
*/
unsigned long flags;
struct evl_thread *curr;
#ifdef CONFIG_SMP
int cpu;
struct cpumask resched; /* CPUs pending resched */
#endif
struct evl_sched_fifo fifo;
struct evl_sched_weak weak;
#ifdef CONFIG_EVL_SCHED_QUOTA
......@@ -80,18 +78,28 @@ struct evl_rq {
#ifdef CONFIG_EVL_SCHED_TP
struct evl_sched_tp tp;
#endif
struct evl_timer inband_timer;
struct evl_timer rrbtimer; /* Round-robin */
struct evl_thread root_thread;
char *proxy_timer_name;
char *rrb_timer_name;
#ifdef CONFIG_EVL_WATCHDOG
struct evl_timer wdtimer;
#endif
#ifdef CONFIG_EVL_RUNSTATS
ktime_t last_account_switch;
struct evl_account *current_account;
#endif
/*
* runqueue-local data the owner may modify locklessly.
*/
unsigned long local_flags;
#ifdef CONFIG_SMP
int cpu;
struct cpumask resched_cpus;
#endif
struct evl_timer inband_timer;
struct evl_timer rrbtimer;
#ifdef CONFIG_EVL_WATCHDOG
struct evl_timer wdtimer;
#endif
/* Misc stuff. */
char *proxy_timer_name;
char *rrb_timer_name;
};
DECLARE_PER_CPU(struct evl_rq, evl_runqueues);
......@@ -187,17 +195,17 @@ static inline struct evl_thread *this_evl_rq_thread(void)
/* Test resched flag of given rq. */
static inline int evl_need_resched(struct evl_rq *rq)
{
return rq->status & RQ_SCHED;
return rq->flags & RQ_SCHED;
}
/* Set self resched flag for the current scheduler. */
/* Set resched flag for the current rq. */
static inline void evl_set_self_resched(struct evl_rq *rq)
{
requires_ugly_lock();
rq->status |= RQ_SCHED;
assert_evl_lock(&rq->lock);
rq->flags |= RQ_SCHED;
}
/* Set resched flag for the given scheduler. */
/* Set resched flag for the given rq. */
#ifdef CONFIG_SMP
static inline bool is_evl_cpu(int cpu)
......@@ -214,12 +222,25 @@ static inline void evl_set_resched(struct evl_rq *rq)
{
struct evl_rq *this_rq = this_evl_rq();
if (this_rq == rq)
this_rq->status |= RQ_SCHED;
else if (!evl_need_resched(rq)) {
cpumask_set_cpu(evl_rq_cpu(rq), &this_rq->resched);
rq->status |= RQ_SCHED;
this_rq->status |= RQ_SCHED;
assert_evl_lock(&rq->lock); /* Implies oob is stalled. */
if (this_rq == rq) {
this_rq->flags |= RQ_SCHED;
} else if (!evl_need_resched(rq)) {
rq->flags |= RQ_SCHED;
/*
* The following updates change CPU-local data and oob
* is stalled on the current CPU, so this is safe
* despite that we don't hold this_rq->lock.
*
* NOTE: raising RQ_SCHED in the local_flags too
* ensures that the current CPU will pass through
* evl_schedule() to __evl_schedule() at the next
* opportunity for sending the resched IPIs (see
* test_resched()).
*/
this_rq->local_flags |= RQ_SCHED;
cpumask_set_cpu(evl_rq_cpu(rq), &this_rq->resched_cpus);
}
}
......@@ -228,6 +249,9 @@ static inline bool is_threading_cpu(int cpu)
return !!cpumask_test_cpu(cpu, &evl_cpu_affinity);
}
void evl_migrate_thread(struct evl_thread *thread,
struct evl_rq *dst_rq);
#else /* !CONFIG_SMP */
static inline bool is_evl_cpu(int cpu)
......@@ -250,6 +274,11 @@ static inline bool is_threading_cpu(int cpu)
return true;
}
static inline
void evl_migrate_thread(struct evl_thread *thread,
struct evl_rq *dst_rq)
{ }
#endif /* !CONFIG_SMP */
#define for_each_evl_cpu(cpu) \
......@@ -263,11 +292,11 @@ static inline void evl_schedule(void)
struct evl_rq *this_rq = this_evl_rq();
/*
* If we race here reading the scheduler state locklessly
* because of a CPU migration, we must be running over the
* in-band stage, in which case the call to __evl_schedule()
* will be escalated to the oob stage where migration cannot
* happen, ensuring safe access to the runqueue state.
* If we race here reading the rq state locklessly because of
* a CPU migration, we must be running over the in-band stage,
* in which case the call to __evl_schedule() will be
* escalated to the oob stage where migration cannot happen,
* ensuring safe access to the runqueue state.
*
* Remote RQ_SCHED requests are paired with out-of-band IPIs
* running on the oob stage by definition, so we can't miss
......@@ -276,7 +305,7 @@ static inline void evl_schedule(void)
* Finally, RQ_IRQ is always tested from the CPU which handled
* an out-of-band interrupt, there is no coherence issue.
*/
if (((this_rq->status|this_rq->lflags) & (RQ_IRQ|RQ_SCHED)) != RQ_SCHED)
if (((this_rq->flags|this_rq->local_flags) & (RQ_IRQ|RQ_SCHED)) != RQ_SCHED)
return;
if (likely(running_oob())) {
......@@ -287,6 +316,10 @@ static inline void evl_schedule(void)
run_oob_call((int (*)(void *))__evl_schedule, NULL);
}
int evl_switch_oob(void);
void evl_switch_inband(int cause);
static inline int evl_preempt_count(void)
{
return dovetail_current_state()->preempt_count;
......@@ -300,7 +333,7 @@ static inline void __evl_disable_preempt(void)
static inline void __evl_enable_preempt(void)
{
if (--dovetail_current_state()->preempt_count == 0 &&
!hard_irqs_disabled())
!oob_irqs_disabled())
evl_schedule();
}
......@@ -325,7 +358,7 @@ static inline void evl_enable_preempt(void)
static inline bool evl_in_irq(void)
{
return !!(this_evl_rq()->lflags & RQ_IRQ);
return !!(this_evl_rq()->local_flags & RQ_IRQ);
}
static inline bool evl_is_inband(void)
......@@ -338,6 +371,21 @@ static inline bool evl_cannot_block(void)
return evl_in_irq() || evl_is_inband();
}
#define evl_get_thread_rq(__thread, __flags) \
({ \
struct evl_rq *__rq; \
evl_spin_lock_irqsave(&(__thread)->lock, __flags); \
__rq = (__thread)->rq; \
evl_spin_lock(&__rq->lock); \
__rq; \
})
#define evl_put_thread_rq(__thread, __rq, __flags) \
do { \
evl_spin_unlock(&(__rq)->lock); \
evl_spin_unlock_irqrestore(&(__thread)->lock, __flags); \
} while (0)
bool evl_set_effective_thread_priority(struct evl_thread *thread,
int prio);
......@@ -360,9 +408,6 @@ void evl_track_thread_policy(struct evl_thread *thread,
void evl_protect_thread_priority(struct evl_thread *thread,
int prio);
void evl_migrate_rq(struct evl_thread *thread,
struct evl_rq *rq);
static inline
void evl_rotate_rq(struct evl_rq *rq,
struct evl_sched_class *sched_class,
......@@ -391,19 +436,18 @@ static inline int evl_init_rq_thread(struct evl_thread *thread)
return ret;
}
/* nklock held, irqs off */
/* rq->lock held, irqs off */
static inline void evl_sched_tick(struct evl_rq *rq)
{
struct evl_thread *curr = rq->curr;
struct evl_sched_class *sched_class = curr->sched_class;
requires_ugly_lock();
assert_evl_lock(&rq->lock);
/*
* A thread that undergoes round-robin scheduling only
* consumes its time slice when it runs within its own
* scheduling class, which excludes temporary PI boosts, and
* does not hold the scheduler lock.
* scheduling class, which excludes temporary PI boosts.
*/
if (sched_class == curr->base_class &&
sched_class->sched_tick &&
......@@ -419,8 +463,7 @@ int evl_check_schedparams(struct evl_sched_class *sched_class,
{
int ret = 0;
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
if (sched_class->sched_chkparam)
ret = sched_class->sched_chkparam(thread, p);
......@@ -435,8 +478,7 @@ int evl_declare_thread(struct evl_sched_class *sched_class,
{
int ret;
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
if (sched_class->sched_declare) {
ret = sched_class->sched_declare(thread, p);
......@@ -459,8 +501,7 @@ static __always_inline void evl_enqueue_thread(struct evl_thread *thread)
{
struct evl_sched_class *sched_class = thread->sched_class;
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
/*
* Enqueue for next pick: i.e. move to end of current priority
......@@ -476,8 +517,7 @@ static __always_inline void evl_dequeue_thread(struct evl_thread *thread)
{
struct evl_sched_class *sched_class = thread->sched_class;
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
/*
* Pull from the runnable thread queue.
......@@ -492,8 +532,7 @@ static __always_inline void evl_requeue_thread(struct evl_thread *thread)
{
struct evl_sched_class *sched_class = thread->sched_class;
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
/*
* Put back at same place: i.e. requeue to head of current
......@@ -509,8 +548,7 @@ static inline
bool evl_set_schedparam(struct evl_thread *thread,
const union evl_sched_param *p)
{
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
return thread->base_class->sched_setparam(thread, p);
}
......@@ -518,8 +556,7 @@ bool evl_set_schedparam(struct evl_thread *thread,
static inline void evl_get_schedparam(struct evl_thread *thread,
union evl_sched_param *p)
{
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
thread->sched_class->sched_getparam(thread, p);
}
......@@ -527,8 +564,7 @@ static inline void evl_get_schedparam(struct evl_thread *thread,
static inline void evl_track_priority(struct evl_thread *thread,
const union evl_sched_param *p)
{
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
thread->sched_class->sched_trackprio(thread, p);
thread->wprio = evl_calc_weighted_prio(thread->sched_class, thread->cprio);
......@@ -536,8 +572,7 @@ static inline void evl_track_priority(struct evl_thread *thread,
static inline void evl_ceil_priority(struct evl_thread *thread, int prio)
{
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
thread->sched_class->sched_ceilprio(thread, prio);
thread->wprio = evl_calc_weighted_prio(thread->sched_class, thread->cprio);
......@@ -547,8 +582,7 @@ static inline void evl_forget_thread(struct evl_thread *thread)
{
struct evl_sched_class *sched_class = thread->base_class;
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
--sched_class->nthreads;
......@@ -560,8 +594,7 @@ static inline void evl_force_thread(struct evl_thread *thread)
{
struct evl_sched_class *sched_class = thread->base_class;
assert_evl_lock(&thread->lock);
requires_ugly_lock();
assert_thread_pinned(thread);
thread->info |= T_KICKED;
......
......@@ -86,7 +86,8 @@ struct evl_thread {
ktime_t rrperiod; /* Round-robin period (ns) */
/*
* Shared data, covered by both thread->lock AND nklock.
* Shared data, covered by both thread->lock AND
* thread->rq->lock.
*/
__u32 state;
__u32 info;
......@@ -100,10 +101,12 @@ struct evl_thread {
struct list_head tp_link; /* evl_rq->tp.threads */
#endif
struct list_head rq_next; /* evl_rq->policy.runqueue */
struct list_head next; /* in evl_thread_list */
/*
* Thread-local data the owner may modified locklessly.
* Thread-local data the owner may modify locklessly.
*/
struct dovetail_altsched_context altsched;
__u32 local_info;
void *wait_data;
struct {
......@@ -112,6 +115,7 @@ struct evl_thread {
int nr;
} poll_context;
atomic_t inband_disable_count;
struct irq_work inband_work;
struct {
struct evl_counter isw; /* in-band switches */
struct evl_counter csw; /* context switches */
......@@ -120,23 +124,18 @@ struct evl_thread {
struct evl_account account; /* exec time accounting */
struct evl_account lastperiod;
} stat;
struct evl_user_window *u_window;
/* Misc stuff. */
struct list_head trackers; /* Mutexes tracking @thread */
hard_spinlock_t tracking_lock;
struct list_head next; /* in evl_thread_list */
struct dovetail_altsched_context altsched;
struct evl_element element;
struct cpumask affinity;
char *name;
struct completion exited;
struct irq_work inband_work;
kernel_cap_t raised_cap;
struct evl_user_window *u_window;
struct list_head kill_next;
char *name;
};
struct evl_kthread {
......@@ -183,6 +182,10 @@ void __evl_test_cancel(struct evl_thread *curr);
void evl_discard_thread(struct evl_thread *thread);
/*
* Might differ from this_evl_rq() if @current is running inband, and
* evl_migrate_thread() is pending until it switches back to oob.
*/
static inline struct evl_thread *evl_current(void)
{
return dovetail_current_state()->thread;
......@@ -260,10 +263,6 @@ int evl_join_thread(struct evl_thread *thread,
void evl_get_thread_state(struct evl_thread *thread,
struct evl_thread_state *statebuf);
int evl_switch_oob(void);
void evl_switch_inband(int cause);
int evl_detach_self(void);
void evl_kick_thread(struct evl_thread *thread);
......
......@@ -45,7 +45,7 @@ static inline void evl_notify_proxy_tick(struct evl_rq *this_rq)
* previous set_next_ktime() request received from the kernel
* we have carried out using our core timing services.
*/
this_rq->lflags &= ~RQ_TPROXY;
this_rq->local_flags &= ~RQ_TPROXY;
tick_notify_proxy();
}
......
......@@ -319,15 +319,10 @@ static inline ktime_t evl_get_timer_expiry(struct evl_timer *timer)
evl_get_timer_gravity(timer));
}
/* no lock required. */
ktime_t evl_get_timer_date(struct evl_timer *timer);
/* no lock required. */
ktime_t __evl_get_timer_delta(struct evl_timer *timer);
ktime_t xntimer_get_interval(struct evl_timer *timer);
/* no lock required. */
static inline ktime_t evl_get_timer_delta(struct evl_timer *timer)
{
if (!evl_timer_is_running(timer))
......@@ -336,7 +331,6 @@ static inline ktime_t evl_get_timer_delta(struct evl_timer *timer)
return __evl_get_timer_delta(timer);
}
/* no lock required. */
static inline
ktime_t __evl_get_stopped_timer_delta(struct evl_timer *timer)
{
......@@ -382,13 +376,6 @@ void evl_move_timer(struct evl_timer *timer,
#ifdef CONFIG_SMP
static inline void evl_set_timer_rq(struct evl_timer *timer,
struct evl_rq *rq)
{
if (rq != timer->rq)
evl_move_timer(timer, timer->clock, rq);
}
static inline void evl_prepare_timed_wait(struct evl_timer *timer,
struct evl_clock *clock,
struct evl_rq *rq)
......@@ -406,10 +393,6 @@ static inline bool evl_timer_on_rq(struct evl_timer *timer,
#else /* ! CONFIG_SMP */
static inline void evl_set_timer_rq(struct evl_timer *timer,
struct evl_rq *rq)
{ }
static inline void evl_prepare_timed_wait(struct evl_timer *timer,
struct evl_clock *clock,
struct evl_rq *rq)
......
......@@ -217,34 +217,32 @@ DECLARE_EVENT_CLASS(evl_clock_ident,
TP_printk("name=%s", __get_str(name))
);
TRACE_EVENT(evl_schedule,
DECLARE_EVENT_CLASS(evl_schedule_event,
TP_PROTO(struct evl_rq *rq),
TP_ARGS(rq),
TP_STRUCT__entry(
__field(unsigned long, status)
__field(unsigned long, flags)
__field(unsigned long, local_flags)
),
TP_fast_assign(
__entry->status = rq->status;
__entry->flags = rq->flags;
__entry->local_flags = rq->local_flags;
),
TP_printk("status=%#lx", __entry->status)
TP_printk("flags=%#lx, local_flags=%#lx",
__entry->flags, __entry->local_flags)
);
TRACE_EVENT(evl_schedule_remote,
DEFINE_EVENT(evl_schedule_event, evl_schedule,
TP_PROTO(struct evl_rq *rq),
TP_ARGS(rq),
TP_STRUCT__entry(
__field(unsigned long, status)
),
TP_fast_assign(
__entry->status = rq->status;
),
TP_ARGS(rq)
);
TP_printk("status=%#lx", __entry->status)
DEFINE_EVENT(evl_schedule_event, evl_reschedule_ipi,
TP_PROTO(struct evl_rq *rq),
TP_ARGS(rq)
);
TRACE_EVENT(evl_switch_context,
......
......@@ -300,7 +300,7 @@ static void do_clock_tick(struct evl_clock *clock, struct evl_timerbase *tmb)
* handler. This is a hint for the program_local_shot()
* handler of the ticking clock.
*/
rq->lflags |= RQ_TIMER;
rq->local_flags |= RQ_TIMER;
now = evl_read_clock(clock);
while ((tn = evl_get_tqueue_head(tq)) != NULL) {
......@@ -319,8 +319,8 @@ static void do_clock_tick(struct evl_clock *clock, struct evl_timerbase *tmb)
* of the core tick interrupt.
*/
if (unlikely(timer == &rq->inband_timer)) {
rq->lflags |= RQ_TPROXY;
rq->lflags &= ~RQ_TDEFER;
rq->local_flags |= RQ_TPROXY;