aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/core.c59
-rw-r--r--kernel/sched/cpufreq.c17
-rw-r--r--kernel/sched/cpufreq_schedutil.c8
-rw-r--r--kernel/sched/fair.c7
-rw-r--r--kernel/sched/idle.c3
-rw-r--r--kernel/sched/tune_dummy.c122
7 files changed, 202 insertions, 15 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index e9adba01c456..5bd0fbecc37b 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
obj-$(CONFIG_SCHED_TUNE) += tune.o
+obj-$(CONFIG_SCHED_TUNE_DUMMY) += tune_dummy.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2e3c650573bb..5f832ae982e6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3544,6 +3544,50 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
prepare_arch_switch(next);
}
+void release_task_stack(struct task_struct *tsk);
+static void task_async_free(struct work_struct *work)
+{
+ struct task_struct *t = container_of(work, typeof(*t), async_free.work);
+ bool free_stack = READ_ONCE(t->async_free.free_stack);
+
+ atomic_set(&t->async_free.running, 0);
+
+ if (free_stack) {
+ release_task_stack(t);
+ put_task_struct(t);
+ } else {
+ __put_task_struct(t);
+ }
+}
+
+static void finish_task_switch_dead(struct task_struct *prev)
+{
+ if (atomic_cmpxchg(&prev->async_free.running, 0, 1)) {
+ put_task_stack(prev);
+ put_task_struct(prev);
+ return;
+ }
+
+ if (atomic_dec_and_test(&prev->stack_refcount)) {
+ prev->async_free.free_stack = true;
+ } else if (atomic_dec_and_test(&prev->usage)) {
+ prev->async_free.free_stack = false;
+ } else {
+ atomic_set(&prev->async_free.running, 0);
+ return;
+ }
+
+ INIT_WORK(&prev->async_free.work, task_async_free);
+ queue_work(system_unbound_wq, &prev->async_free.work);
+}
+
+static void mmdrop_async_free(struct work_struct *work)
+{
+ struct mm_struct *mm = container_of(work, typeof(*mm), async_put_work);
+
+ __mmdrop(mm);
+}
+
/**
* finish_task_switch - clean up after a task-switch
* @prev: the thread we just switched away from.
@@ -3617,8 +3661,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
kcov_finish_switch(current);
fire_sched_in_preempt_notifiers(current);
- if (mm)
- mmdrop(mm);
+ if (mm && atomic_dec_and_test(&mm->mm_count)) {
+ INIT_WORK(&mm->async_put_work, mmdrop_async_free);
+ queue_work(system_unbound_wq, &mm->async_put_work);
+ }
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
@@ -3629,11 +3675,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
*/
kprobe_flush_task(prev);
- /* Task is done with its stack. */
- put_task_stack(prev);
-
- put_task_struct(prev);
-
+ finish_task_switch_dead(prev);
}
tick_nohz_task_switch();
@@ -5002,7 +5044,8 @@ static void __setscheduler_params(struct task_struct *p,
if (policy == SETPARAM_POLICY)
policy = p->policy;
- p->policy = policy;
+ /* Replace SCHED_FIFO with SCHED_RR to reduce latency */
+ p->policy = policy == SCHED_FIFO ? SCHED_RR : policy;
if (dl_policy(policy))
__setparam_dl(p, attr);
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index dbc51442ecbc..c2129347f793 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -8,6 +8,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/cpufreq.h>
#include "sched.h"
@@ -61,3 +62,19 @@ void cpufreq_remove_update_util_hook(int cpu)
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL);
}
EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook);
+
+/**
+ * cpufreq_can_do_remote_dvfs - Check if cpufreq policy can be updated.
+ * @policy: cpufreq policy to check.
+ *
+ * Return 'true' if:
+ * - the local and remote CPUs share @policy,
+ * - dvfs_possible_from_any_cpu is set in @policy and the local CPU is not going
+ * offline (in which case it is not expected to run cpufreq updates any more).
+ */
+bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy)
+{
+ return cpumask_test_cpu(smp_processor_id(), policy->cpus) ||
+ (policy->dvfs_possible_from_any_cpu &&
+ rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)));
+}
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d92acbfece89..83b58e7fa864 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -108,12 +108,10 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
* by the hardware, as calculating the frequency is pointless if
* we cannot in fact act on it.
*
- * For the slow switching platforms, the kthread is always scheduled on
- * the right set of CPUs and any CPU can find the next frequency and
- * schedule the kthread.
+ * This is needed on the slow switching platforms too to prevent CPUs
+ * going offline from leaving stale IRQ work items behind.
*/
- if (sg_policy->policy->fast_switch_enabled &&
- !cpufreq_can_do_remote_dvfs(sg_policy->policy))
+ if (!cpufreq_can_do_remote_dvfs(sg_policy->policy))
return false;
if (unlikely(sg_policy->need_freq_update)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f676735e500a..0b521c46a86e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7822,6 +7822,13 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
continue;
/*
+ * Skip searching for active CPU for tasks have
+ * high priority & prefer_high_cap.
+ */
+ if (prefer_high_cap && p->prio <= DEFAULT_PRIO)
+ continue;
+
+ /*
* Case A.2: Target ACTIVE CPU
* Favor CPUs with max spare capacity.
*/
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c8f70ea89099..63b871ddce22 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -196,7 +196,7 @@ static void cpuidle_idle_call(void)
*/
next_state = cpuidle_select(drv, dev, &stop_tick);
- if (stop_tick || tick_nohz_tick_stopped())
+ if (stop_tick)
tick_nohz_idle_stop_tick();
else
tick_nohz_idle_retain_tick();
@@ -239,7 +239,6 @@ static void do_idle(void)
*/
__current_set_polling();
- quiet_vmstat();
tick_nohz_idle_enter();
while (!need_resched()) {
diff --git a/kernel/sched/tune_dummy.c b/kernel/sched/tune_dummy.c
new file mode 100644
index 000000000000..271e7d9cc2f3
--- /dev/null
+++ b/kernel/sched/tune_dummy.c
@@ -0,0 +1,122 @@
+#include <linux/cgroup.h>
+#include "sched.h"
+
+#define BOOSTGROUPS_COUNT 5
+
+struct schedtune {
+ /* SchedTune CGroup subsystem */
+ struct cgroup_subsys_state css;
+
+ /* Boost value for tasks on that SchedTune CGroup */
+ int boost;
+
+ /* Hint to bias scheduling of tasks on that SchedTune CGroup
+ * towards idle CPUs */
+ int prefer_idle;
+};
+
+static struct schedtune
+root_schedtune = {
+ .boost = 0,
+ .prefer_idle = 0,
+};
+
+static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
+ &root_schedtune,
+ NULL,
+};
+
+static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
+{
+ return container_of(css, struct schedtune, css);
+}
+
+static u64
+prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return 0;
+}
+
+static int
+prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 prefer_idle)
+{
+ return 0;
+}
+
+static s64
+boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return 0;
+}
+
+static int
+boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
+ s64 boost)
+{
+ return 0;
+}
+
+static struct cftype files[] = {
+ {
+ .name = "boost",
+ .read_s64 = boost_read,
+ .write_s64 = boost_write,
+ },
+ {
+ .name = "prefer_idle",
+ .read_u64 = prefer_idle_read,
+ .write_u64 = prefer_idle_write,
+ },
+ { } /* terminate */
+};
+
+static struct cgroup_subsys_state *
+schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+ struct schedtune *st;
+ int idx;
+
+ if (!parent_css)
+ return &root_schedtune.css;
+
+ /* Allow only single level hierachies */
+ if (parent_css != &root_schedtune.css) {
+ pr_err("Nested SchedTune boosting groups not allowed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Allow only a limited number of boosting groups */
+ for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
+ if (!allocated_group[idx])
+ break;
+ if (idx == BOOSTGROUPS_COUNT) {
+ pr_err("Trying to create more than %d SchedTune boosting groups\n",
+ BOOSTGROUPS_COUNT);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ st = kzalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto out;
+
+ return &st->css;
+
+out:
+ return ERR_PTR(-ENOMEM);
+}
+
+static void
+schedtune_css_free(struct cgroup_subsys_state *css)
+{
+ struct schedtune *st = css_st(css);
+
+ kfree(st);
+}
+
+struct cgroup_subsys schedtune_cgrp_subsys = {
+ .css_alloc = schedtune_css_alloc,
+ .css_free = schedtune_css_free,
+ .legacy_cftypes = files,
+ .early_init = 1,
+};