diff options
| author | spkal01 <kalligeross@gmail.com> | 2021-05-17 02:37:28 +0530 |
|---|---|---|
| committer | spkal01 <kalligeross@gmail.com> | 2021-05-17 02:37:28 +0530 |
| commit | 93b265ae2eba8d93d0ffa406958547232f3114c8 (patch) | |
| tree | c2f093aa144f732b5cf7bd8a0b45bf35eda42e1c /kernel | |
| parent | 0a82617b8fce8994076b518064e7d420af290ea8 (diff) | |
| parent | 016f4ba70bffb6d02725e778c3989fa542e6d12a (diff) | |
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup/cpuset.c | 53 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 25 | ||||
| -rw-r--r-- | kernel/locking/rwsem-xadd.c | 26 | ||||
| -rw-r--r-- | kernel/sched/Makefile | 1 | ||||
| -rw-r--r-- | kernel/sched/core.c | 59 | ||||
| -rw-r--r-- | kernel/sched/cpufreq.c | 17 | ||||
| -rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 8 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 7 | ||||
| -rw-r--r-- | kernel/sched/idle.c | 3 | ||||
| -rw-r--r-- | kernel/sched/tune_dummy.c | 122 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 2 |
12 files changed, 237 insertions, 88 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index ec0563b4484d..b04cd4c051eb 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1879,45 +1879,6 @@ int cpu_uclamp_boost_write_u64_wrapper(struct cgroup_subsys_state *css, struct cftype *cftype, u64 boost); u64 cpu_uclamp_boost_read_u64_wrapper(struct cgroup_subsys_state *css, struct cftype *cft); - -#if !defined(CONFIG_SCHED_TUNE) -static u64 st_boost_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - if (!strlen(css->cgroup->kn->name)) - return -EINVAL; - - return cpu_uclamp_boost_read_u64_wrapper(css, cft); -} - -static int st_boost_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 boost) -{ - if (!strlen(css->cgroup->kn->name)) - return -EINVAL; - - return cpu_uclamp_boost_write_u64_wrapper(css, cft, boost); -} - -static u64 st_prefer_idle_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - if (!strlen(css->cgroup->kn->name)) - return -EINVAL; - - return cpu_uclamp_ls_read_u64_wrapper(css, cft); -} - -static int st_prefer_idle_write(struct cgroup_subsys_state *css, - struct cftype *cft, u64 prefer_idle) -{ - if (!strlen(css->cgroup->kn->name)) - return -EINVAL; - - return cpu_uclamp_ls_write_u64_wrapper(css, cft, prefer_idle); -} -#endif - #endif /* @@ -2047,20 +2008,6 @@ static struct cftype files[] = { .read_u64 = cpu_uclamp_boost_read_u64_wrapper, .write_u64 = cpu_uclamp_boost_write_u64_wrapper, }, - -#if !defined(CONFIG_SCHED_TUNE) - { - .name = "schedtune.boost", - .read_u64 = st_boost_read, - .write_u64 = st_boost_write, - }, - { - .name = "schedtune.prefer_idle", - .read_u64 = st_prefer_idle_read, - .write_u64 = st_prefer_idle_write, - }, -#endif - #endif { } /* terminate */ }; diff --git a/kernel/fork.c b/kernel/fork.c index b46d05a9a7ff..20d04ce1374d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -342,7 +342,7 @@ static void account_kernel_stack(struct task_struct *tsk, int account) } } -static void release_task_stack(struct task_struct *tsk) +void release_task_stack(struct task_struct *tsk) { if (WARN_ON(tsk->state != TASK_DEAD)) return; /* Better to leak the stack than to free prematurely */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 57e28af96c5b..6dba40af542c 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -427,21 +427,31 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, { bool ret = true; - rcu_read_lock(); - while (__mutex_owner(lock) == owner) { + for (;;) { + unsigned int cpu; + bool same_owner; + /* - * Ensure we emit the owner->on_cpu, dereference _after_ - * checking lock->owner still matches owner. If that fails, + * Ensure lock->owner still matches owner. If that fails, * owner might point to freed memory. If it still matches, * the rcu_read_lock() ensures the memory stays valid. */ - barrier(); + rcu_read_lock(); + same_owner = __mutex_owner(lock) == owner; + if (same_owner) { + ret = owner->on_cpu; + if (ret) + cpu = task_cpu(owner); + } + rcu_read_unlock(); + + if (!ret || !same_owner) + break; /* * Use vcpu_is_preempted to detect lock holder preemption issue. */ - if (!owner->on_cpu || need_resched() || - vcpu_is_preempted(task_cpu(owner))) { + if (need_resched() || vcpu_is_preempted(cpu)) { ret = false; break; } @@ -453,7 +463,6 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, cpu_relax(); } - rcu_read_unlock(); return ret; } diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 350861f0da16..8917e6ceb3d0 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -400,32 +400,36 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner = READ_ONCE(sem->owner); - if (!is_rwsem_owner_spinnable(owner)) + if (!owner || !is_rwsem_owner_spinnable(owner)) return false; - rcu_read_lock(); - while (owner && (READ_ONCE(sem->owner) == owner)) { + while (true) { + bool on_cpu, same_owner; + /* - * Ensure we emit the owner->on_cpu, dereference _after_ - * checking sem->owner still matches owner, if that fails, + * Ensure sem->owner still matches owner. If that fails, * owner might point to free()d memory, if it still matches, * the rcu_read_lock() ensures the memory stays valid. */ - barrier(); + rcu_read_lock(); + same_owner = sem->owner == owner; + if (same_owner) + on_cpu = owner->on_cpu && + !vcpu_is_preempted(task_cpu(owner)); + rcu_read_unlock(); + + if (!same_owner) + break; /* * abort spinning when need_resched or owner is not running or * owner's cpu is preempted. */ - if (!owner->on_cpu || need_resched() || - vcpu_is_preempted(task_cpu(owner))) { - rcu_read_unlock(); + if (!on_cpu || need_resched()) return false; - } cpu_relax(); } - rcu_read_unlock(); /* * If there is a new owner or the owner is not set, we continue diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index e9adba01c456..5bd0fbecc37b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_SCHED_TUNE) += tune.o +obj-$(CONFIG_SCHED_TUNE_DUMMY) += tune_dummy.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2e3c650573bb..5f832ae982e6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3544,6 +3544,50 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, prepare_arch_switch(next); } +void release_task_stack(struct task_struct *tsk); +static void task_async_free(struct work_struct *work) +{ + struct task_struct *t = container_of(work, typeof(*t), async_free.work); + bool free_stack = READ_ONCE(t->async_free.free_stack); + + atomic_set(&t->async_free.running, 0); + + if (free_stack) { + release_task_stack(t); + put_task_struct(t); + } else { + __put_task_struct(t); + } +} + +static void finish_task_switch_dead(struct task_struct *prev) +{ + if (atomic_cmpxchg(&prev->async_free.running, 0, 1)) { + put_task_stack(prev); + put_task_struct(prev); + return; + } + + if (atomic_dec_and_test(&prev->stack_refcount)) { + prev->async_free.free_stack = true; + } else if (atomic_dec_and_test(&prev->usage)) { + prev->async_free.free_stack = false; + } else { + atomic_set(&prev->async_free.running, 0); + return; + } + + INIT_WORK(&prev->async_free.work, task_async_free); + queue_work(system_unbound_wq, &prev->async_free.work); +} + +static void mmdrop_async_free(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, typeof(*mm), async_put_work); + + __mmdrop(mm); +} + /** * finish_task_switch - clean up after a task-switch * @prev: the thread we just switched away from. @@ -3617,8 +3661,10 @@ static struct rq *finish_task_switch(struct task_struct *prev) kcov_finish_switch(current); fire_sched_in_preempt_notifiers(current); - if (mm) - mmdrop(mm); + if (mm && atomic_dec_and_test(&mm->mm_count)) { + INIT_WORK(&mm->async_put_work, mmdrop_async_free); + queue_work(system_unbound_wq, &mm->async_put_work); + } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); @@ -3629,11 +3675,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) */ kprobe_flush_task(prev); - /* Task is done with its stack. */ - put_task_stack(prev); - - put_task_struct(prev); - + finish_task_switch_dead(prev); } tick_nohz_task_switch(); @@ -5002,7 +5044,8 @@ static void __setscheduler_params(struct task_struct *p, if (policy == SETPARAM_POLICY) policy = p->policy; - p->policy = policy; + /* Replace SCHED_FIFO with SCHED_RR to reduce latency */ + p->policy = policy == SCHED_FIFO ? SCHED_RR : policy; if (dl_policy(policy)) __setparam_dl(p, attr); diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index dbc51442ecbc..c2129347f793 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -8,6 +8,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/cpufreq.h> #include "sched.h" @@ -61,3 +62,19 @@ void cpufreq_remove_update_util_hook(int cpu) rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL); } EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook); + +/** + * cpufreq_can_do_remote_dvfs - Check if cpufreq policy can be updated. + * @policy: cpufreq policy to check. + * + * Return 'true' if: + * - the local and remote CPUs share @policy, + * - dvfs_possible_from_any_cpu is set in @policy and the local CPU is not going + * offline (in which case it is not expected to run cpufreq updates any more). + */ +bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) +{ + return cpumask_test_cpu(smp_processor_id(), policy->cpus) || + (policy->dvfs_possible_from_any_cpu && + rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data))); +} diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d92acbfece89..83b58e7fa864 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -108,12 +108,10 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) * by the hardware, as calculating the frequency is pointless if * we cannot in fact act on it. * - * For the slow switching platforms, the kthread is always scheduled on - * the right set of CPUs and any CPU can find the next frequency and - * schedule the kthread. + * This is needed on the slow switching platforms too to prevent CPUs + * going offline from leaving stale IRQ work items behind. */ - if (sg_policy->policy->fast_switch_enabled && - !cpufreq_can_do_remote_dvfs(sg_policy->policy)) + if (!cpufreq_can_do_remote_dvfs(sg_policy->policy)) return false; if (unlikely(sg_policy->need_freq_update)) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f676735e500a..0b521c46a86e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7822,6 +7822,13 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, continue; /* + * Skip searching for active CPU for tasks have + * high priority & prefer_high_cap. + */ + if (prefer_high_cap && p->prio <= DEFAULT_PRIO) + continue; + + /* * Case A.2: Target ACTIVE CPU * Favor CPUs with max spare capacity. */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index c8f70ea89099..63b871ddce22 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -196,7 +196,7 @@ static void cpuidle_idle_call(void) */ next_state = cpuidle_select(drv, dev, &stop_tick); - if (stop_tick || tick_nohz_tick_stopped()) + if (stop_tick) tick_nohz_idle_stop_tick(); else tick_nohz_idle_retain_tick(); @@ -239,7 +239,6 @@ static void do_idle(void) */ __current_set_polling(); - quiet_vmstat(); tick_nohz_idle_enter(); while (!need_resched()) { diff --git a/kernel/sched/tune_dummy.c b/kernel/sched/tune_dummy.c new file mode 100644 index 000000000000..271e7d9cc2f3 --- /dev/null +++ b/kernel/sched/tune_dummy.c @@ -0,0 +1,122 @@ +#include <linux/cgroup.h> +#include "sched.h" + +#define BOOSTGROUPS_COUNT 5 + +struct schedtune { + /* SchedTune CGroup subsystem */ + struct cgroup_subsys_state css; + + /* Boost value for tasks on that SchedTune CGroup */ + int boost; + + /* Hint to bias scheduling of tasks on that SchedTune CGroup + * towards idle CPUs */ + int prefer_idle; +}; + +static struct schedtune +root_schedtune = { + .boost = 0, + .prefer_idle = 0, +}; + +static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { + &root_schedtune, + NULL, +}; + +static inline struct schedtune *css_st(struct cgroup_subsys_state *css) +{ + return container_of(css, struct schedtune, css); +} + +static u64 +prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return 0; +} + +static int +prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft, + u64 prefer_idle) +{ + return 0; +} + +static s64 +boost_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return 0; +} + +static int +boost_write(struct cgroup_subsys_state *css, struct cftype *cft, + s64 boost) +{ + return 0; +} + +static struct cftype files[] = { + { + .name = "boost", + .read_s64 = boost_read, + .write_s64 = boost_write, + }, + { + .name = "prefer_idle", + .read_u64 = prefer_idle_read, + .write_u64 = prefer_idle_write, + }, + { } /* terminate */ +}; + +static struct cgroup_subsys_state * +schedtune_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct schedtune *st; + int idx; + + if (!parent_css) + return &root_schedtune.css; + + /* Allow only single level hierachies */ + if (parent_css != &root_schedtune.css) { + pr_err("Nested SchedTune boosting groups not allowed\n"); + return ERR_PTR(-ENOMEM); + } + + /* Allow only a limited number of boosting groups */ + for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) + if (!allocated_group[idx]) + break; + if (idx == BOOSTGROUPS_COUNT) { + pr_err("Trying to create more than %d SchedTune boosting groups\n", + BOOSTGROUPS_COUNT); + return ERR_PTR(-ENOSPC); + } + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto out; + + return &st->css; + +out: + return ERR_PTR(-ENOMEM); +} + +static void +schedtune_css_free(struct cgroup_subsys_state *css) +{ + struct schedtune *st = css_st(css); + + kfree(st); +} + +struct cgroup_subsys schedtune_cgrp_subsys = { + .css_alloc = schedtune_css_alloc, + .css_free = schedtune_css_free, + .legacy_cftypes = files, + .early_init = 1, +}; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d8a2ae86ebd3..e9c7494ed8d9 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -29,6 +29,7 @@ #include <linux/timer.h> #include <linux/context_tracking.h> #include <linux/rq_stats.h> +#include <linux/mm.h> #include <asm/irq_regs.h> @@ -823,6 +824,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) if (!ts->tick_stopped) { calc_load_nohz_start(); cpu_load_update_nohz_start(); + quiet_vmstat(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; |
