aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorspkal01 <kalligeross@gmail.com>2021-05-17 02:37:28 +0530
committerspkal01 <kalligeross@gmail.com>2021-05-17 02:37:28 +0530
commit93b265ae2eba8d93d0ffa406958547232f3114c8 (patch)
treec2f093aa144f732b5cf7bd8a0b45bf35eda42e1c /kernel
parent0a82617b8fce8994076b518064e7d420af290ea8 (diff)
parent016f4ba70bffb6d02725e778c3989fa542e6d12a (diff)
Merge branch 'android11' of https://github.com/vantoman/kernel_xiaomi_sm6150 into HEADHEADr11.1
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cpuset.c53
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/locking/mutex.c25
-rw-r--r--kernel/locking/rwsem-xadd.c26
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/core.c59
-rw-r--r--kernel/sched/cpufreq.c17
-rw-r--r--kernel/sched/cpufreq_schedutil.c8
-rw-r--r--kernel/sched/fair.c7
-rw-r--r--kernel/sched/idle.c3
-rw-r--r--kernel/sched/tune_dummy.c122
-rw-r--r--kernel/time/tick-sched.c2
12 files changed, 237 insertions, 88 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ec0563b4484d..b04cd4c051eb 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1879,45 +1879,6 @@ int cpu_uclamp_boost_write_u64_wrapper(struct cgroup_subsys_state *css,
struct cftype *cftype, u64 boost);
u64 cpu_uclamp_boost_read_u64_wrapper(struct cgroup_subsys_state *css,
struct cftype *cft);
-
-#if !defined(CONFIG_SCHED_TUNE)
-static u64 st_boost_read(struct cgroup_subsys_state *css,
- struct cftype *cft)
-{
- if (!strlen(css->cgroup->kn->name))
- return -EINVAL;
-
- return cpu_uclamp_boost_read_u64_wrapper(css, cft);
-}
-
-static int st_boost_write(struct cgroup_subsys_state *css,
- struct cftype *cft, u64 boost)
-{
- if (!strlen(css->cgroup->kn->name))
- return -EINVAL;
-
- return cpu_uclamp_boost_write_u64_wrapper(css, cft, boost);
-}
-
-static u64 st_prefer_idle_read(struct cgroup_subsys_state *css,
- struct cftype *cft)
-{
- if (!strlen(css->cgroup->kn->name))
- return -EINVAL;
-
- return cpu_uclamp_ls_read_u64_wrapper(css, cft);
-}
-
-static int st_prefer_idle_write(struct cgroup_subsys_state *css,
- struct cftype *cft, u64 prefer_idle)
-{
- if (!strlen(css->cgroup->kn->name))
- return -EINVAL;
-
- return cpu_uclamp_ls_write_u64_wrapper(css, cft, prefer_idle);
-}
-#endif
-
#endif
/*
@@ -2047,20 +2008,6 @@ static struct cftype files[] = {
.read_u64 = cpu_uclamp_boost_read_u64_wrapper,
.write_u64 = cpu_uclamp_boost_write_u64_wrapper,
},
-
-#if !defined(CONFIG_SCHED_TUNE)
- {
- .name = "schedtune.boost",
- .read_u64 = st_boost_read,
- .write_u64 = st_boost_write,
- },
- {
- .name = "schedtune.prefer_idle",
- .read_u64 = st_prefer_idle_read,
- .write_u64 = st_prefer_idle_write,
- },
-#endif
-
#endif
{ } /* terminate */
};
diff --git a/kernel/fork.c b/kernel/fork.c
index b46d05a9a7ff..20d04ce1374d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -342,7 +342,7 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
}
}
-static void release_task_stack(struct task_struct *tsk)
+void release_task_stack(struct task_struct *tsk)
{
if (WARN_ON(tsk->state != TASK_DEAD))
return; /* Better to leak the stack than to free prematurely */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 57e28af96c5b..6dba40af542c 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -427,21 +427,31 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
{
bool ret = true;
- rcu_read_lock();
- while (__mutex_owner(lock) == owner) {
+ for (;;) {
+ unsigned int cpu;
+ bool same_owner;
+
/*
- * Ensure we emit the owner->on_cpu, dereference _after_
- * checking lock->owner still matches owner. If that fails,
+ * Ensure lock->owner still matches owner. If that fails,
* owner might point to freed memory. If it still matches,
* the rcu_read_lock() ensures the memory stays valid.
*/
- barrier();
+ rcu_read_lock();
+ same_owner = __mutex_owner(lock) == owner;
+ if (same_owner) {
+ ret = owner->on_cpu;
+ if (ret)
+ cpu = task_cpu(owner);
+ }
+ rcu_read_unlock();
+
+ if (!ret || !same_owner)
+ break;
/*
* Use vcpu_is_preempted to detect lock holder preemption issue.
*/
- if (!owner->on_cpu || need_resched() ||
- vcpu_is_preempted(task_cpu(owner))) {
+ if (need_resched() || vcpu_is_preempted(cpu)) {
ret = false;
break;
}
@@ -453,7 +463,6 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
cpu_relax();
}
- rcu_read_unlock();
return ret;
}
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 350861f0da16..8917e6ceb3d0 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -400,32 +400,36 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner = READ_ONCE(sem->owner);
- if (!is_rwsem_owner_spinnable(owner))
+ if (!owner || !is_rwsem_owner_spinnable(owner))
return false;
- rcu_read_lock();
- while (owner && (READ_ONCE(sem->owner) == owner)) {
+ while (true) {
+ bool on_cpu, same_owner;
+
/*
- * Ensure we emit the owner->on_cpu, dereference _after_
- * checking sem->owner still matches owner, if that fails,
+ * Ensure sem->owner still matches owner. If that fails,
* owner might point to free()d memory, if it still matches,
* the rcu_read_lock() ensures the memory stays valid.
*/
- barrier();
+ rcu_read_lock();
+ same_owner = sem->owner == owner;
+ if (same_owner)
+ on_cpu = owner->on_cpu &&
+ !vcpu_is_preempted(task_cpu(owner));
+ rcu_read_unlock();
+
+ if (!same_owner)
+ break;
/*
* abort spinning when need_resched or owner is not running or
* owner's cpu is preempted.
*/
- if (!owner->on_cpu || need_resched() ||
- vcpu_is_preempted(task_cpu(owner))) {
- rcu_read_unlock();
+ if (!on_cpu || need_resched())
return false;
- }
cpu_relax();
}
- rcu_read_unlock();
/*
* If there is a new owner or the owner is not set, we continue
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index e9adba01c456..5bd0fbecc37b 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
obj-$(CONFIG_SCHED_TUNE) += tune.o
+obj-$(CONFIG_SCHED_TUNE_DUMMY) += tune_dummy.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2e3c650573bb..5f832ae982e6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3544,6 +3544,50 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
prepare_arch_switch(next);
}
+void release_task_stack(struct task_struct *tsk);
+static void task_async_free(struct work_struct *work)
+{
+ struct task_struct *t = container_of(work, typeof(*t), async_free.work);
+ bool free_stack = READ_ONCE(t->async_free.free_stack);
+
+ atomic_set(&t->async_free.running, 0);
+
+ if (free_stack) {
+ release_task_stack(t);
+ put_task_struct(t);
+ } else {
+ __put_task_struct(t);
+ }
+}
+
+static void finish_task_switch_dead(struct task_struct *prev)
+{
+ if (atomic_cmpxchg(&prev->async_free.running, 0, 1)) {
+ put_task_stack(prev);
+ put_task_struct(prev);
+ return;
+ }
+
+ if (atomic_dec_and_test(&prev->stack_refcount)) {
+ prev->async_free.free_stack = true;
+ } else if (atomic_dec_and_test(&prev->usage)) {
+ prev->async_free.free_stack = false;
+ } else {
+ atomic_set(&prev->async_free.running, 0);
+ return;
+ }
+
+ INIT_WORK(&prev->async_free.work, task_async_free);
+ queue_work(system_unbound_wq, &prev->async_free.work);
+}
+
+static void mmdrop_async_free(struct work_struct *work)
+{
+ struct mm_struct *mm = container_of(work, typeof(*mm), async_put_work);
+
+ __mmdrop(mm);
+}
+
/**
* finish_task_switch - clean up after a task-switch
* @prev: the thread we just switched away from.
@@ -3617,8 +3661,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
kcov_finish_switch(current);
fire_sched_in_preempt_notifiers(current);
- if (mm)
- mmdrop(mm);
+ if (mm && atomic_dec_and_test(&mm->mm_count)) {
+ INIT_WORK(&mm->async_put_work, mmdrop_async_free);
+ queue_work(system_unbound_wq, &mm->async_put_work);
+ }
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
@@ -3629,11 +3675,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
*/
kprobe_flush_task(prev);
- /* Task is done with its stack. */
- put_task_stack(prev);
-
- put_task_struct(prev);
-
+ finish_task_switch_dead(prev);
}
tick_nohz_task_switch();
@@ -5002,7 +5044,8 @@ static void __setscheduler_params(struct task_struct *p,
if (policy == SETPARAM_POLICY)
policy = p->policy;
- p->policy = policy;
+ /* Replace SCHED_FIFO with SCHED_RR to reduce latency */
+ p->policy = policy == SCHED_FIFO ? SCHED_RR : policy;
if (dl_policy(policy))
__setparam_dl(p, attr);
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index dbc51442ecbc..c2129347f793 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -8,6 +8,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/cpufreq.h>
#include "sched.h"
@@ -61,3 +62,19 @@ void cpufreq_remove_update_util_hook(int cpu)
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL);
}
EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook);
+
+/**
+ * cpufreq_can_do_remote_dvfs - Check if cpufreq policy can be updated.
+ * @policy: cpufreq policy to check.
+ *
+ * Return 'true' if:
+ * - the local and remote CPUs share @policy,
+ * - dvfs_possible_from_any_cpu is set in @policy and the local CPU is not going
+ * offline (in which case it is not expected to run cpufreq updates any more).
+ */
+bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy)
+{
+ return cpumask_test_cpu(smp_processor_id(), policy->cpus) ||
+ (policy->dvfs_possible_from_any_cpu &&
+ rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)));
+}
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d92acbfece89..83b58e7fa864 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -108,12 +108,10 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
* by the hardware, as calculating the frequency is pointless if
* we cannot in fact act on it.
*
- * For the slow switching platforms, the kthread is always scheduled on
- * the right set of CPUs and any CPU can find the next frequency and
- * schedule the kthread.
+ * This is needed on the slow switching platforms too to prevent CPUs
+ * going offline from leaving stale IRQ work items behind.
*/
- if (sg_policy->policy->fast_switch_enabled &&
- !cpufreq_can_do_remote_dvfs(sg_policy->policy))
+ if (!cpufreq_can_do_remote_dvfs(sg_policy->policy))
return false;
if (unlikely(sg_policy->need_freq_update)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f676735e500a..0b521c46a86e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7822,6 +7822,13 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
continue;
/*
+ * Skip searching for active CPU for tasks have
+ * high priority & prefer_high_cap.
+ */
+ if (prefer_high_cap && p->prio <= DEFAULT_PRIO)
+ continue;
+
+ /*
* Case A.2: Target ACTIVE CPU
* Favor CPUs with max spare capacity.
*/
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c8f70ea89099..63b871ddce22 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -196,7 +196,7 @@ static void cpuidle_idle_call(void)
*/
next_state = cpuidle_select(drv, dev, &stop_tick);
- if (stop_tick || tick_nohz_tick_stopped())
+ if (stop_tick)
tick_nohz_idle_stop_tick();
else
tick_nohz_idle_retain_tick();
@@ -239,7 +239,6 @@ static void do_idle(void)
*/
__current_set_polling();
- quiet_vmstat();
tick_nohz_idle_enter();
while (!need_resched()) {
diff --git a/kernel/sched/tune_dummy.c b/kernel/sched/tune_dummy.c
new file mode 100644
index 000000000000..271e7d9cc2f3
--- /dev/null
+++ b/kernel/sched/tune_dummy.c
@@ -0,0 +1,122 @@
+#include <linux/cgroup.h>
+#include "sched.h"
+
+#define BOOSTGROUPS_COUNT 5
+
+struct schedtune {
+ /* SchedTune CGroup subsystem */
+ struct cgroup_subsys_state css;
+
+ /* Boost value for tasks on that SchedTune CGroup */
+ int boost;
+
+ /* Hint to bias scheduling of tasks on that SchedTune CGroup
+ * towards idle CPUs */
+ int prefer_idle;
+};
+
+static struct schedtune
+root_schedtune = {
+ .boost = 0,
+ .prefer_idle = 0,
+};
+
+static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
+ &root_schedtune,
+ NULL,
+};
+
+static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
+{
+ return container_of(css, struct schedtune, css);
+}
+
+static u64
+prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return 0;
+}
+
+static int
+prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 prefer_idle)
+{
+ return 0;
+}
+
+static s64
+boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return 0;
+}
+
+static int
+boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
+ s64 boost)
+{
+ return 0;
+}
+
+static struct cftype files[] = {
+ {
+ .name = "boost",
+ .read_s64 = boost_read,
+ .write_s64 = boost_write,
+ },
+ {
+ .name = "prefer_idle",
+ .read_u64 = prefer_idle_read,
+ .write_u64 = prefer_idle_write,
+ },
+ { } /* terminate */
+};
+
+static struct cgroup_subsys_state *
+schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+ struct schedtune *st;
+ int idx;
+
+ if (!parent_css)
+ return &root_schedtune.css;
+
+ /* Allow only single level hierachies */
+ if (parent_css != &root_schedtune.css) {
+ pr_err("Nested SchedTune boosting groups not allowed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Allow only a limited number of boosting groups */
+ for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
+ if (!allocated_group[idx])
+ break;
+ if (idx == BOOSTGROUPS_COUNT) {
+ pr_err("Trying to create more than %d SchedTune boosting groups\n",
+ BOOSTGROUPS_COUNT);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ st = kzalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto out;
+
+ return &st->css;
+
+out:
+ return ERR_PTR(-ENOMEM);
+}
+
+static void
+schedtune_css_free(struct cgroup_subsys_state *css)
+{
+ struct schedtune *st = css_st(css);
+
+ kfree(st);
+}
+
+struct cgroup_subsys schedtune_cgrp_subsys = {
+ .css_alloc = schedtune_css_alloc,
+ .css_free = schedtune_css_free,
+ .legacy_cftypes = files,
+ .early_init = 1,
+};
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d8a2ae86ebd3..e9c7494ed8d9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -29,6 +29,7 @@
#include <linux/timer.h>
#include <linux/context_tracking.h>
#include <linux/rq_stats.h>
+#include <linux/mm.h>
#include <asm/irq_regs.h>
@@ -823,6 +824,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
if (!ts->tick_stopped) {
calc_load_nohz_start();
cpu_load_update_nohz_start();
+ quiet_vmstat();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;