diff options
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/Makefile | 1 | ||||
| -rw-r--r-- | kernel/sched/core.c | 59 | ||||
| -rw-r--r-- | kernel/sched/cpufreq.c | 17 | ||||
| -rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 8 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 7 | ||||
| -rw-r--r-- | kernel/sched/idle.c | 3 | ||||
| -rw-r--r-- | kernel/sched/tune_dummy.c | 122 |
7 files changed, 202 insertions, 15 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index e9adba01c456..5bd0fbecc37b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_SCHED_TUNE) += tune.o +obj-$(CONFIG_SCHED_TUNE_DUMMY) += tune_dummy.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2e3c650573bb..5f832ae982e6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3544,6 +3544,50 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, prepare_arch_switch(next); } +void release_task_stack(struct task_struct *tsk); +static void task_async_free(struct work_struct *work) +{ + struct task_struct *t = container_of(work, typeof(*t), async_free.work); + bool free_stack = READ_ONCE(t->async_free.free_stack); + + atomic_set(&t->async_free.running, 0); + + if (free_stack) { + release_task_stack(t); + put_task_struct(t); + } else { + __put_task_struct(t); + } +} + +static void finish_task_switch_dead(struct task_struct *prev) +{ + if (atomic_cmpxchg(&prev->async_free.running, 0, 1)) { + put_task_stack(prev); + put_task_struct(prev); + return; + } + + if (atomic_dec_and_test(&prev->stack_refcount)) { + prev->async_free.free_stack = true; + } else if (atomic_dec_and_test(&prev->usage)) { + prev->async_free.free_stack = false; + } else { + atomic_set(&prev->async_free.running, 0); + return; + } + + INIT_WORK(&prev->async_free.work, task_async_free); + queue_work(system_unbound_wq, &prev->async_free.work); +} + +static void mmdrop_async_free(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, typeof(*mm), async_put_work); + + __mmdrop(mm); +} + /** * finish_task_switch - clean up after a task-switch * @prev: the thread we just switched away from. @@ -3617,8 +3661,10 @@ static struct rq *finish_task_switch(struct task_struct *prev) kcov_finish_switch(current); fire_sched_in_preempt_notifiers(current); - if (mm) - mmdrop(mm); + if (mm && atomic_dec_and_test(&mm->mm_count)) { + INIT_WORK(&mm->async_put_work, mmdrop_async_free); + queue_work(system_unbound_wq, &mm->async_put_work); + } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); @@ -3629,11 +3675,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) */ kprobe_flush_task(prev); - /* Task is done with its stack. */ - put_task_stack(prev); - - put_task_struct(prev); - + finish_task_switch_dead(prev); } tick_nohz_task_switch(); @@ -5002,7 +5044,8 @@ static void __setscheduler_params(struct task_struct *p, if (policy == SETPARAM_POLICY) policy = p->policy; - p->policy = policy; + /* Replace SCHED_FIFO with SCHED_RR to reduce latency */ + p->policy = policy == SCHED_FIFO ? SCHED_RR : policy; if (dl_policy(policy)) __setparam_dl(p, attr); diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c index dbc51442ecbc..c2129347f793 100644 --- a/kernel/sched/cpufreq.c +++ b/kernel/sched/cpufreq.c @@ -8,6 +8,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/cpufreq.h> #include "sched.h" @@ -61,3 +62,19 @@ void cpufreq_remove_update_util_hook(int cpu) rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL); } EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook); + +/** + * cpufreq_can_do_remote_dvfs - Check if cpufreq policy can be updated. + * @policy: cpufreq policy to check. + * + * Return 'true' if: + * - the local and remote CPUs share @policy, + * - dvfs_possible_from_any_cpu is set in @policy and the local CPU is not going + * offline (in which case it is not expected to run cpufreq updates any more). + */ +bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) +{ + return cpumask_test_cpu(smp_processor_id(), policy->cpus) || + (policy->dvfs_possible_from_any_cpu && + rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data))); +} diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d92acbfece89..83b58e7fa864 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -108,12 +108,10 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) * by the hardware, as calculating the frequency is pointless if * we cannot in fact act on it. * - * For the slow switching platforms, the kthread is always scheduled on - * the right set of CPUs and any CPU can find the next frequency and - * schedule the kthread. + * This is needed on the slow switching platforms too to prevent CPUs + * going offline from leaving stale IRQ work items behind. */ - if (sg_policy->policy->fast_switch_enabled && - !cpufreq_can_do_remote_dvfs(sg_policy->policy)) + if (!cpufreq_can_do_remote_dvfs(sg_policy->policy)) return false; if (unlikely(sg_policy->need_freq_update)) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f676735e500a..0b521c46a86e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7822,6 +7822,13 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, continue; /* + * Skip searching for active CPU for tasks have + * high priority & prefer_high_cap. + */ + if (prefer_high_cap && p->prio <= DEFAULT_PRIO) + continue; + + /* * Case A.2: Target ACTIVE CPU * Favor CPUs with max spare capacity. */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index c8f70ea89099..63b871ddce22 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -196,7 +196,7 @@ static void cpuidle_idle_call(void) */ next_state = cpuidle_select(drv, dev, &stop_tick); - if (stop_tick || tick_nohz_tick_stopped()) + if (stop_tick) tick_nohz_idle_stop_tick(); else tick_nohz_idle_retain_tick(); @@ -239,7 +239,6 @@ static void do_idle(void) */ __current_set_polling(); - quiet_vmstat(); tick_nohz_idle_enter(); while (!need_resched()) { diff --git a/kernel/sched/tune_dummy.c b/kernel/sched/tune_dummy.c new file mode 100644 index 000000000000..271e7d9cc2f3 --- /dev/null +++ b/kernel/sched/tune_dummy.c @@ -0,0 +1,122 @@ +#include <linux/cgroup.h> +#include "sched.h" + +#define BOOSTGROUPS_COUNT 5 + +struct schedtune { + /* SchedTune CGroup subsystem */ + struct cgroup_subsys_state css; + + /* Boost value for tasks on that SchedTune CGroup */ + int boost; + + /* Hint to bias scheduling of tasks on that SchedTune CGroup + * towards idle CPUs */ + int prefer_idle; +}; + +static struct schedtune +root_schedtune = { + .boost = 0, + .prefer_idle = 0, +}; + +static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { + &root_schedtune, + NULL, +}; + +static inline struct schedtune *css_st(struct cgroup_subsys_state *css) +{ + return container_of(css, struct schedtune, css); +} + +static u64 +prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return 0; +} + +static int +prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft, + u64 prefer_idle) +{ + return 0; +} + +static s64 +boost_read(struct cgroup_subsys_state *css, struct cftype *cft) +{ + return 0; +} + +static int +boost_write(struct cgroup_subsys_state *css, struct cftype *cft, + s64 boost) +{ + return 0; +} + +static struct cftype files[] = { + { + .name = "boost", + .read_s64 = boost_read, + .write_s64 = boost_write, + }, + { + .name = "prefer_idle", + .read_u64 = prefer_idle_read, + .write_u64 = prefer_idle_write, + }, + { } /* terminate */ +}; + +static struct cgroup_subsys_state * +schedtune_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct schedtune *st; + int idx; + + if (!parent_css) + return &root_schedtune.css; + + /* Allow only single level hierachies */ + if (parent_css != &root_schedtune.css) { + pr_err("Nested SchedTune boosting groups not allowed\n"); + return ERR_PTR(-ENOMEM); + } + + /* Allow only a limited number of boosting groups */ + for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) + if (!allocated_group[idx]) + break; + if (idx == BOOSTGROUPS_COUNT) { + pr_err("Trying to create more than %d SchedTune boosting groups\n", + BOOSTGROUPS_COUNT); + return ERR_PTR(-ENOSPC); + } + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto out; + + return &st->css; + +out: + return ERR_PTR(-ENOMEM); +} + +static void +schedtune_css_free(struct cgroup_subsys_state *css) +{ + struct schedtune *st = css_st(css); + + kfree(st); +} + +struct cgroup_subsys schedtune_cgrp_subsys = { + .css_alloc = schedtune_css_alloc, + .css_free = schedtune_css_free, + .legacy_cftypes = files, + .early_init = 1, +}; |
