aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorKaan Külahlı <kaankulahli@gmail.com>2017-05-25 19:54:16 +0300
committerGitHub <noreply@github.com>2017-05-25 19:54:16 +0300
commit943acc22babac805296a8d6de42f424f8c00df5a (patch)
treeb710cb20d9b8c1e50a7705564ed08a968eb4c163 /kernel
parent6f93f25b482cb3833bad393a2f4cde8a0d1a69ca (diff)
parent14f0a97da192d0cf20ae234389925827d9c2c119 (diff)
Merge pull request #3 from kernel-hut/cm-14.1HEADn7.1
pull request
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/core_ctl.c1115
2 files changed, 1116 insertions, 0 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 1c7bde63c5e..53ccd2f5d41 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
+obj-$(CONFIG_SCHED_CORE_CTL) += core_ctl.o
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
new file mode 100644
index 00000000000..a1134c86d27
--- /dev/null
+++ b/kernel/sched/core_ctl.c
@@ -0,0 +1,1115 @@
+/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/timer.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/mutex.h>
+
+#include <trace/events/power.h>
+
+#define MAX_CPUS_PER_GROUP 4
+
+struct cpu_data {
+ /* Per CPU data. */
+ bool inited;
+ bool online;
+ bool rejected;
+ bool is_busy;
+ bool not_preferred;
+ unsigned int busy;
+ unsigned int cpu;
+ struct list_head sib;
+ unsigned int first_cpu;
+ struct list_head pending_sib;
+
+ /* Per cluster data set only on first CPU */
+ unsigned int min_cpus;
+ unsigned int max_cpus;
+ unsigned int offline_delay_ms;
+ unsigned int busy_up_thres[MAX_CPUS_PER_GROUP];
+ unsigned int busy_down_thres[MAX_CPUS_PER_GROUP];
+ unsigned int online_cpus;
+ unsigned int avail_cpus;
+ unsigned int num_cpus;
+ unsigned int need_cpus;
+ unsigned int task_thres;
+ s64 need_ts;
+ struct list_head lru;
+ bool pending;
+ spinlock_t pending_lock;
+ bool is_big_cluster;
+ int nrrun;
+ bool nrrun_changed;
+ struct timer_list timer;
+ struct task_struct *hotplug_thread;
+ struct kobject kobj;
+ struct list_head pending_lru;
+ bool disabled;
+};
+
+static DEFINE_PER_CPU(struct cpu_data, cpu_state);
+static DEFINE_SPINLOCK(state_lock);
+static DEFINE_SPINLOCK(pending_lru_lock);
+static DEFINE_MUTEX(lru_lock);
+
+static void apply_need(struct cpu_data *f);
+static void wake_up_hotplug_thread(struct cpu_data *state);
+static void add_to_pending_lru(struct cpu_data *state);
+static void update_lru(struct cpu_data *state);
+
+/* ========================= sysfs interface =========================== */
+
+static ssize_t store_min_cpus(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+
+ if (sscanf(buf, "%u\n", &val) != 1)
+ return -EINVAL;
+
+ state->min_cpus = min(val, state->max_cpus);
+ wake_up_hotplug_thread(state);
+
+ return count;
+}
+
+static ssize_t show_min_cpus(struct cpu_data *state, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", state->min_cpus);
+}
+
+static ssize_t store_max_cpus(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+
+ if (sscanf(buf, "%u\n", &val) != 1)
+ return -EINVAL;
+
+ val = min(val, state->num_cpus);
+ state->max_cpus = val;
+ state->min_cpus = min(state->min_cpus, state->max_cpus);
+ wake_up_hotplug_thread(state);
+
+ return count;
+}
+
+static ssize_t show_max_cpus(struct cpu_data *state, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", state->max_cpus);
+}
+
+static ssize_t store_offline_delay_ms(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+
+ if (sscanf(buf, "%u\n", &val) != 1)
+ return -EINVAL;
+
+ state->offline_delay_ms = val;
+ apply_need(state);
+
+ return count;
+}
+
+static ssize_t show_task_thres(struct cpu_data *state, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", state->task_thres);
+}
+
+static ssize_t store_task_thres(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+
+ if (sscanf(buf, "%u\n", &val) != 1)
+ return -EINVAL;
+
+ if (val < state->num_cpus)
+ return -EINVAL;
+
+ state->task_thres = val;
+ apply_need(state);
+
+ return count;
+}
+
+static ssize_t show_offline_delay_ms(struct cpu_data *state, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", state->offline_delay_ms);
+}
+
+static ssize_t store_busy_up_thres(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val[MAX_CPUS_PER_GROUP];
+ int ret, i;
+
+ ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
+ if (ret != 1 && ret != state->num_cpus)
+ return -EINVAL;
+
+ if (ret == 1) {
+ for (i = 0; i < state->num_cpus; i++)
+ state->busy_up_thres[i] = val[0];
+ } else {
+ for (i = 0; i < state->num_cpus; i++)
+ state->busy_up_thres[i] = val[i];
+ }
+ apply_need(state);
+ return count;
+}
+
+static ssize_t show_busy_up_thres(struct cpu_data *state, char *buf)
+{
+ int i, count = 0;
+
+ for (i = 0; i < state->num_cpus; i++)
+ count += snprintf(buf + count, PAGE_SIZE - count, "%u ",
+ state->busy_up_thres[i]);
+ count += snprintf(buf + count, PAGE_SIZE - count, "\n");
+ return count;
+}
+
+static ssize_t store_busy_down_thres(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val[MAX_CPUS_PER_GROUP];
+ int ret, i;
+
+ ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
+ if (ret != 1 && ret != state->num_cpus)
+ return -EINVAL;
+
+ if (ret == 1) {
+ for (i = 0; i < state->num_cpus; i++)
+ state->busy_down_thres[i] = val[0];
+ } else {
+ for (i = 0; i < state->num_cpus; i++)
+ state->busy_down_thres[i] = val[i];
+ }
+ apply_need(state);
+ return count;
+}
+
+static ssize_t show_busy_down_thres(struct cpu_data *state, char *buf)
+{
+ int i, count = 0;
+
+ for (i = 0; i < state->num_cpus; i++)
+ count += snprintf(buf + count, PAGE_SIZE - count, "%u ",
+ state->busy_down_thres[i]);
+ count += snprintf(buf + count, PAGE_SIZE - count, "\n");
+ return count;
+}
+
+static ssize_t store_is_big_cluster(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+
+ if (sscanf(buf, "%u\n", &val) != 1)
+ return -EINVAL;
+
+ state->is_big_cluster = val ? 1 : 0;
+ return count;
+}
+
+static ssize_t show_is_big_cluster(struct cpu_data *state, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", state->is_big_cluster);
+}
+
+static ssize_t show_cpus(struct cpu_data *state, char *buf)
+{
+ struct cpu_data *c;
+ ssize_t count = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&state_lock, flags);
+ list_for_each_entry(c, &state->lru, sib) {
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "CPU%u (%s)\n", c->cpu,
+ c->online ? "Online" : "Offline");
+ }
+ spin_unlock_irqrestore(&state_lock, flags);
+ return count;
+}
+
+static ssize_t show_need_cpus(struct cpu_data *state, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", state->need_cpus);
+}
+
+static ssize_t show_online_cpus(struct cpu_data *state, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", state->online_cpus);
+}
+
+static ssize_t show_global_state(struct cpu_data *state, char *buf)
+{
+ struct cpu_data *c;
+ ssize_t count = 0;
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "CPU%u\n", cpu);
+ c = &per_cpu(cpu_state, cpu);
+ if (!c->inited)
+ continue;
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tCPU: %u\n", c->cpu);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tOnline: %u\n", c->online);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tRejected: %u\n", c->rejected);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tFirst CPU: %u\n", c->first_cpu);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tBusy%%: %u\n", c->busy);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tIs busy: %u\n", c->is_busy);
+ if (c->cpu != c->first_cpu)
+ continue;
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tNr running: %u\n", c->nrrun);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tAvail CPUs: %u\n", c->avail_cpus);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tNeed CPUs: %u\n", c->need_cpus);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tStatus: %s\n",
+ c->disabled ? "disabled" : "enabled");
+ }
+
+ return count;
+}
+
+static ssize_t store_not_preferred(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ struct cpu_data *c;
+ unsigned int i, first_cpu;
+ unsigned int val[MAX_CPUS_PER_GROUP];
+ int ret;
+
+ ret = sscanf(buf, "%u %u %u %u\n", &val[0], &val[1], &val[2], &val[3]);
+ if (ret != 1 && ret != state->num_cpus)
+ return -EINVAL;
+
+ first_cpu = state->first_cpu;
+
+ for (i = 0; i < state->num_cpus; i++) {
+ c = &per_cpu(cpu_state, first_cpu);
+ c->not_preferred = val[i];
+ first_cpu++;
+ }
+
+ return count;
+}
+
+static ssize_t show_not_preferred(struct cpu_data *state, char *buf)
+{
+ struct cpu_data *c;
+ ssize_t count = 0;
+ unsigned int i, first_cpu;
+
+ first_cpu = state->first_cpu;
+
+ for (i = 0; i < state->num_cpus; i++) {
+ c = &per_cpu(cpu_state, first_cpu);
+ count += snprintf(buf + count, PAGE_SIZE - count,
+ "\tCPU:%d %u\n", first_cpu, c->not_preferred);
+ first_cpu++;
+ }
+
+ return count;
+}
+
+static ssize_t store_disable(struct cpu_data *state,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+
+ if (sscanf(buf, "%u\n", &val) != 1)
+ return -EINVAL;
+
+ val = !!val;
+
+ if (state->disabled == val)
+ return count;
+
+ state->disabled = val;
+
+ if (!state->disabled)
+ wake_up_hotplug_thread(state);
+
+
+ return count;
+}
+
+static ssize_t show_disable(struct cpu_data *state, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", state->disabled);
+}
+
+struct core_ctl_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct cpu_data *, char *);
+ ssize_t (*store)(struct cpu_data *, const char *, size_t count);
+};
+
+#define core_ctl_attr_ro(_name) \
+static struct core_ctl_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define core_ctl_attr_rw(_name) \
+static struct core_ctl_attr _name = \
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+core_ctl_attr_rw(min_cpus);
+core_ctl_attr_rw(max_cpus);
+core_ctl_attr_rw(offline_delay_ms);
+core_ctl_attr_rw(busy_up_thres);
+core_ctl_attr_rw(busy_down_thres);
+core_ctl_attr_rw(task_thres);
+core_ctl_attr_rw(is_big_cluster);
+core_ctl_attr_ro(cpus);
+core_ctl_attr_ro(need_cpus);
+core_ctl_attr_ro(online_cpus);
+core_ctl_attr_ro(global_state);
+core_ctl_attr_rw(not_preferred);
+core_ctl_attr_rw(disable);
+
+static struct attribute *default_attrs[] = {
+ &min_cpus.attr,
+ &max_cpus.attr,
+ &offline_delay_ms.attr,
+ &busy_up_thres.attr,
+ &busy_down_thres.attr,
+ &task_thres.attr,
+ &is_big_cluster.attr,
+ &cpus.attr,
+ &need_cpus.attr,
+ &online_cpus.attr,
+ &global_state.attr,
+ &not_preferred.attr,
+ &disable.attr,
+ NULL
+};
+
+#define to_cpu_data(k) container_of(k, struct cpu_data, kobj)
+#define to_attr(a) container_of(a, struct core_ctl_attr, attr)
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct cpu_data *data = to_cpu_data(kobj);
+ struct core_ctl_attr *cattr = to_attr(attr);
+ ssize_t ret = -EIO;
+
+ if (cattr->show)
+ ret = cattr->show(data, buf);
+
+ return ret;
+}
+
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cpu_data *data = to_cpu_data(kobj);
+ struct core_ctl_attr *cattr = to_attr(attr);
+ ssize_t ret = -EIO;
+
+ if (cattr->store)
+ ret = cattr->store(data, buf, count);
+
+ return ret;
+}
+
+static const struct sysfs_ops sysfs_ops = {
+ .show = show,
+ .store = store,
+};
+
+static struct kobj_type ktype_core_ctl = {
+ .sysfs_ops = &sysfs_ops,
+ .default_attrs = default_attrs,
+};
+
+/* ==================== runqueue based core count =================== */
+
+#define RQ_AVG_TOLERANCE 2
+#define RQ_AVG_DEFAULT_MS 20
+#define NR_RUNNING_TOLERANCE 5
+static unsigned int rq_avg_period_ms = RQ_AVG_DEFAULT_MS;
+
+static s64 rq_avg_timestamp_ms;
+static struct timer_list rq_avg_timer;
+
+static void update_running_avg(bool trigger_update)
+{
+ int cpu;
+ struct cpu_data *pcpu;
+ int avg, iowait_avg, big_avg, old_nrrun;
+ s64 now;
+ unsigned long flags;
+
+ spin_lock_irqsave(&state_lock, flags);
+
+ now = ktime_to_ms(ktime_get());
+ if (now - rq_avg_timestamp_ms < rq_avg_period_ms - RQ_AVG_TOLERANCE) {
+ spin_unlock_irqrestore(&state_lock, flags);
+ return;
+ }
+ rq_avg_timestamp_ms = now;
+ sched_get_nr_running_avg(&avg, &iowait_avg, &big_avg);
+
+ spin_unlock_irqrestore(&state_lock, flags);
+
+ /*
+ * Round up to the next integer if the average nr running tasks
+ * is within NR_RUNNING_TOLERANCE/100 of the next integer.
+ * If normal rounding up is used, it will allow a transient task
+ * to trigger online event. By the time core is onlined, the task
+ * has finished.
+ * Rounding to closest suffers same problem because scheduler
+ * might only provide running stats per jiffy, and a transient
+ * task could skew the number for one jiffy. If core control
+ * samples every 2 jiffies, it will observe 0.5 additional running
+ * average which rounds up to 1 task.
+ */
+ avg = (avg + NR_RUNNING_TOLERANCE) / 100;
+ big_avg = (big_avg + NR_RUNNING_TOLERANCE) / 100;
+
+ for_each_possible_cpu(cpu) {
+ pcpu = &per_cpu(cpu_state, cpu);
+ if (!pcpu->inited || pcpu->first_cpu != cpu)
+ continue;
+ old_nrrun = pcpu->nrrun;
+ /*
+ * Big cluster only need to take care of big tasks, but if
+ * there are not enough big cores, big tasks need to be run
+ * on little as well. Thus for little's runqueue stat, it
+ * has to use overall runqueue average, or derive what big
+ * tasks would have to be run on little. The latter approach
+ * is not easy to get given core control reacts much slower
+ * than scheduler, and can't predict scheduler's behavior.
+ */
+ pcpu->nrrun = pcpu->is_big_cluster ? big_avg : avg;
+ if (pcpu->nrrun != old_nrrun) {
+ if (trigger_update)
+ apply_need(pcpu);
+ else
+ pcpu->nrrun_changed = true;
+ }
+ }
+}
+
+/* adjust needed CPUs based on current runqueue information */
+static unsigned int apply_task_need(struct cpu_data *f, unsigned int new_need)
+{
+ /* Online all cores if there are enough tasks */
+ if (f->nrrun >= f->task_thres)
+ return f->num_cpus;
+
+ /* only online more cores if there are tasks to run */
+ if (f->nrrun > new_need)
+ return new_need + 1;
+
+ return new_need;
+}
+
+static u64 round_to_nw_start(void)
+{
+ unsigned long step = msecs_to_jiffies(rq_avg_period_ms);
+ u64 jif = get_jiffies_64();
+
+ do_div(jif, step);
+ return (jif + 1) * step;
+}
+
+static void rq_avg_timer_func(unsigned long not_used)
+{
+ update_running_avg(true);
+ mod_timer(&rq_avg_timer, round_to_nw_start());
+}
+
+/* ======================= load based core count ====================== */
+
+static unsigned int apply_limits(struct cpu_data *f, unsigned int need_cpus)
+{
+ return min(max(f->min_cpus, need_cpus), f->max_cpus);
+}
+
+static bool eval_need(struct cpu_data *f)
+{
+ unsigned long flags;
+ struct cpu_data *c;
+ unsigned int need_cpus = 0, last_need, thres_idx;
+ int ret = 0;
+ bool need_flag = false;
+ s64 now;
+
+ if (unlikely(!f->inited))
+ return 0;
+
+ spin_lock_irqsave(&state_lock, flags);
+ thres_idx = f->online_cpus ? f->online_cpus - 1 : 0;
+ list_for_each_entry(c, &f->lru, sib) {
+ if (c->busy >= f->busy_up_thres[thres_idx])
+ c->is_busy = true;
+ else if (c->busy < f->busy_down_thres[thres_idx])
+ c->is_busy = false;
+ need_cpus += c->is_busy;
+ }
+ need_cpus = apply_task_need(f, need_cpus);
+ need_flag = apply_limits(f, need_cpus) != apply_limits(f, f->need_cpus);
+ last_need = f->need_cpus;
+
+ now = ktime_to_ms(ktime_get());
+
+ if (need_cpus == last_need) {
+ f->need_ts = now;
+ spin_unlock_irqrestore(&state_lock, flags);
+ return 0;
+ }
+
+ if (need_cpus > last_need) {
+ ret = 1;
+ } else if (need_cpus < last_need) {
+ s64 elapsed = now - f->need_ts;
+
+ if (elapsed >= f->offline_delay_ms) {
+ ret = 1;
+ } else {
+ mod_timer(&f->timer, jiffies +
+ msecs_to_jiffies(f->offline_delay_ms));
+ }
+ }
+
+ if (ret) {
+ f->need_ts = now;
+ f->need_cpus = need_cpus;
+ }
+
+ trace_core_ctl_eval_need(f->cpu, last_need, need_cpus,
+ ret && need_flag);
+ spin_unlock_irqrestore(&state_lock, flags);
+
+ return ret && need_flag;
+}
+
+static void apply_need(struct cpu_data *f)
+{
+ if (eval_need(f))
+ wake_up_hotplug_thread(f);
+}
+
+static int core_ctl_set_busy(unsigned int cpu, unsigned int busy)
+{
+ struct cpu_data *c = &per_cpu(cpu_state, cpu);
+ struct cpu_data *f;
+ unsigned int old_is_busy = c->is_busy;
+
+ if (!c->inited)
+ return 0;
+ f = &per_cpu(cpu_state, c->first_cpu);
+
+ update_running_avg(false);
+ if (c->busy == busy && !f->nrrun_changed)
+ return 0;
+ c->busy = busy;
+ f->nrrun_changed = false;
+
+ apply_need(f);
+ trace_core_ctl_set_busy(cpu, busy, old_is_busy, c->is_busy);
+ return 0;
+}
+
+/* ========================= core count enforcement ==================== */
+
+/*
+ * If current thread is hotplug thread, don't attempt to wake up
+ * itself or other hotplug threads because it will deadlock. Instead,
+ * schedule a timer to fire in next timer tick and wake up the thread.
+ */
+static void wake_up_hotplug_thread(struct cpu_data *state)
+{
+ unsigned long flags;
+ int cpu;
+ struct cpu_data *pcpu;
+ bool no_wakeup = false;
+
+ if (unlikely(state->disabled))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ pcpu = &per_cpu(cpu_state, cpu);
+ if (cpu != pcpu->first_cpu)
+ continue;
+ if (pcpu->hotplug_thread == current) {
+ no_wakeup = true;
+ break;
+ }
+ }
+
+ spin_lock_irqsave(&state->pending_lock, flags);
+ state->pending = true;
+ spin_unlock_irqrestore(&state->pending_lock, flags);
+
+ if (no_wakeup) {
+ spin_lock_irqsave(&state_lock, flags);
+ mod_timer(&state->timer, jiffies);
+ spin_unlock_irqrestore(&state_lock, flags);
+ } else {
+ wake_up_process(state->hotplug_thread);
+ }
+}
+
+static void core_ctl_timer_func(unsigned long cpu)
+{
+ struct cpu_data *state = &per_cpu(cpu_state, cpu);
+ unsigned long flags;
+
+ if (eval_need(state) && !state->disabled) {
+ spin_lock_irqsave(&state->pending_lock, flags);
+ state->pending = true;
+ spin_unlock_irqrestore(&state->pending_lock, flags);
+ wake_up_process(state->hotplug_thread);
+ }
+
+}
+
+static int core_ctl_online_core(unsigned int cpu)
+{
+ int ret;
+ struct device *dev;
+
+ lock_device_hotplug();
+ dev = get_cpu_device(cpu);
+ if (!dev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, cpu);
+ ret = -ENODEV;
+ } else {
+ ret = device_online(dev);
+ }
+ unlock_device_hotplug();
+ return ret;
+}
+
+static int core_ctl_offline_core(unsigned int cpu)
+{
+ int ret;
+ struct device *dev;
+
+ lock_device_hotplug();
+ dev = get_cpu_device(cpu);
+ if (!dev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, cpu);
+ ret = -ENODEV;
+ } else {
+ ret = device_offline(dev);
+ }
+ unlock_device_hotplug();
+ return ret;
+}
+
+static void update_lru(struct cpu_data *f)
+{
+ struct cpu_data *c, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pending_lru_lock, flags);
+ spin_lock(&state_lock);
+
+ list_for_each_entry_safe(c, tmp, &f->pending_lru, pending_sib) {
+ list_del_init(&c->pending_sib);
+ list_del(&c->sib);
+ list_add_tail(&c->sib, &f->lru);
+ }
+
+ spin_unlock(&state_lock);
+ spin_unlock_irqrestore(&pending_lru_lock, flags);
+}
+
+static void __ref do_hotplug(struct cpu_data *f)
+{
+ unsigned int need;
+ struct cpu_data *c, *tmp;
+
+ need = apply_limits(f, f->need_cpus);
+ pr_debug("Trying to adjust group %u to %u\n", f->first_cpu, need);
+
+ mutex_lock(&lru_lock);
+ if (f->online_cpus > need) {
+ list_for_each_entry_safe(c, tmp, &f->lru, sib) {
+ if (!c->online)
+ continue;
+
+ if (f->online_cpus == need)
+ break;
+
+ /* Don't offline busy CPUs. */
+ if (c->is_busy)
+ continue;
+
+ pr_debug("Trying to Offline CPU%u\n", c->cpu);
+ if (core_ctl_offline_core(c->cpu))
+ pr_debug("Unable to Offline CPU%u\n", c->cpu);
+ }
+
+ /*
+ * If the number of online CPUs is within the limits, then
+ * don't force any busy CPUs offline.
+ */
+ if (f->online_cpus <= f->max_cpus)
+ goto done;
+
+ list_for_each_entry_safe(c, tmp, &f->lru, sib) {
+ if (!c->online)
+ continue;
+
+ if (f->online_cpus <= f->max_cpus)
+ break;
+
+ pr_debug("Trying to Offline CPU%u\n", c->cpu);
+ if (core_ctl_offline_core(c->cpu))
+ pr_debug("Unable to Offline CPU%u\n", c->cpu);
+ }
+ } else if (f->online_cpus < need) {
+ list_for_each_entry_safe(c, tmp, &f->lru, sib) {
+ if (c->online || c->rejected || c->not_preferred)
+ continue;
+ if (f->online_cpus == need)
+ break;
+
+ pr_debug("Trying to Online CPU%u\n", c->cpu);
+ if (core_ctl_online_core(c->cpu))
+ pr_debug("Unable to Online CPU%u\n", c->cpu);
+ }
+
+ if (f->online_cpus == need)
+ goto done;
+
+
+ list_for_each_entry_safe(c, tmp, &f->lru, sib) {
+ if (c->online || c->rejected || !c->not_preferred)
+ continue;
+ if (f->online_cpus == need)
+ break;
+
+ pr_debug("Trying to Online CPU%u\n", c->cpu);
+ if (core_ctl_online_core(c->cpu))
+ pr_debug("Unable to Online CPU%u\n", c->cpu);
+ }
+ }
+done:
+ mutex_unlock(&lru_lock);
+ update_lru(f);
+}
+
+static int __ref try_hotplug(void *data)
+{
+ struct cpu_data *f = data;
+ unsigned long flags;
+
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_lock_irqsave(&f->pending_lock, flags);
+ if (!f->pending) {
+ spin_unlock_irqrestore(&f->pending_lock, flags);
+ schedule();
+ if (kthread_should_stop())
+ break;
+ spin_lock_irqsave(&f->pending_lock, flags);
+ }
+ set_current_state(TASK_RUNNING);
+ f->pending = false;
+ spin_unlock_irqrestore(&f->pending_lock, flags);
+
+ do_hotplug(f);
+ }
+
+ return 0;
+}
+
+static void add_to_pending_lru(struct cpu_data *state)
+{
+ unsigned long flags;
+ struct cpu_data *f = &per_cpu(cpu_state, state->first_cpu);
+
+ spin_lock_irqsave(&pending_lru_lock, flags);
+
+ if (!list_empty(&state->pending_sib))
+ list_del(&state->pending_sib);
+ list_add_tail(&state->pending_sib, &f->pending_lru);
+
+ spin_unlock_irqrestore(&pending_lru_lock, flags);
+}
+
+static int __ref cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ uint32_t cpu = (uintptr_t)hcpu;
+ struct cpu_data *state = &per_cpu(cpu_state, cpu);
+ struct cpu_data *f;
+ int ret = NOTIFY_OK;
+ unsigned long flags;
+
+ /* Don't affect suspend resume */
+ if (action & CPU_TASKS_FROZEN)
+ return NOTIFY_OK;
+
+ if (unlikely(!state->inited))
+ return NOTIFY_OK;
+
+ f = &per_cpu(cpu_state, state->first_cpu);
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+
+ /* If online state of CPU somehow got out of sync, fix it. */
+ if (state->online) {
+ f->online_cpus--;
+ state->online = false;
+ pr_warn("CPU%d offline when state is online\n", cpu);
+ }
+
+ if (state->rejected) {
+ state->rejected = false;
+ f->avail_cpus++;
+ }
+
+ /*
+ * If a CPU is in the process of coming up, mark it as online
+ * so that there's no race with hotplug thread bringing up more
+ * CPUs than necessary.
+ */
+ if (!f->disabled &&
+ apply_limits(f, f->need_cpus) <= f->online_cpus) {
+ pr_debug("Prevent CPU%d onlining\n", cpu);
+ ret = NOTIFY_BAD;
+ } else {
+ state->online = true;
+ f->online_cpus++;
+ }
+ break;
+
+ case CPU_ONLINE:
+ /*
+ * Moving to the end of the list should only happen in
+ * CPU_ONLINE and not on CPU_UP_PREPARE to prevent an
+ * infinite list traversal when thermal (or other entities)
+ * reject trying to online CPUs.
+ */
+ ret = mutex_trylock(&lru_lock);
+ if (ret) {
+ spin_lock_irqsave(&state_lock, flags);
+ list_del(&state->sib);
+ list_add_tail(&state->sib, &f->lru);
+ spin_unlock_irqrestore(&state_lock, flags);
+ mutex_unlock(&lru_lock);
+ } else {
+ /*
+ * lru_lock is held by our hotplug thread to
+ * prevent concurrent access of lru list. The updates
+ * are maintained in pending_lru list and lru is
+ * updated at the end of do_hotplug().
+ */
+ add_to_pending_lru(state);
+ }
+ break;
+
+ case CPU_DEAD:
+ /* Move a CPU to the end of the LRU when it goes offline. */
+ ret = mutex_trylock(&lru_lock);
+ if (ret) {
+ spin_lock_irqsave(&state_lock, flags);
+ list_del(&state->sib);
+ list_add_tail(&state->sib, &f->lru);
+ spin_unlock_irqrestore(&state_lock, flags);
+ mutex_unlock(&lru_lock);
+ } else {
+ add_to_pending_lru(state);
+ }
+ /* Fall through */
+
+ case CPU_UP_CANCELED:
+
+ /* If online state of CPU somehow got out of sync, fix it. */
+ if (!state->online) {
+ f->online_cpus++;
+ pr_warn("CPU%d online when state is offline\n", cpu);
+ }
+
+ if (!state->rejected && action == CPU_UP_CANCELED) {
+ state->rejected = true;
+ f->avail_cpus--;
+ }
+
+ state->online = false;
+ state->busy = 0;
+ f->online_cpus--;
+ break;
+ }
+
+ if (f->online_cpus < apply_limits(f, f->need_cpus)
+ && f->online_cpus < f->avail_cpus
+ && action == CPU_DEAD)
+ wake_up_hotplug_thread(f);
+
+ return ret;
+}
+
+static struct notifier_block __refdata cpu_notifier = {
+ .notifier_call = cpu_callback,
+};
+
+/* ============================ init code ============================== */
+
+static int group_init(struct cpumask *mask)
+{
+ struct device *dev;
+ unsigned int first_cpu = cpumask_first(mask);
+ struct cpu_data *f = &per_cpu(cpu_state, first_cpu);
+ struct cpu_data *state;
+ unsigned int cpu;
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+ if (likely(f->inited))
+ return 0;
+
+ dev = get_cpu_device(first_cpu);
+ if (!dev)
+ return -ENODEV;
+
+ pr_info("Creating CPU group %d\n", first_cpu);
+
+ f->num_cpus = cpumask_weight(mask);
+ if (f->num_cpus > MAX_CPUS_PER_GROUP) {
+ pr_err("HW configuration not supported\n");
+ return -EINVAL;
+ }
+ f->min_cpus = 1;
+ f->max_cpus = f->num_cpus;
+ f->need_cpus = f->num_cpus;
+ f->avail_cpus = f->num_cpus;
+ f->offline_delay_ms = 100;
+ f->task_thres = UINT_MAX;
+ f->nrrun = f->num_cpus;
+ INIT_LIST_HEAD(&f->lru);
+ INIT_LIST_HEAD(&f->pending_lru);
+ init_timer(&f->timer);
+ spin_lock_init(&f->pending_lock);
+ f->timer.function = core_ctl_timer_func;
+ f->timer.data = first_cpu;
+
+ for_each_cpu(cpu, mask) {
+ pr_info("Init CPU%u state\n", cpu);
+
+ state = &per_cpu(cpu_state, cpu);
+ state->cpu = cpu;
+ state->first_cpu = first_cpu;
+
+ if (cpu_online(cpu)) {
+ f->online_cpus++;
+ state->online = true;
+ }
+
+ list_add_tail(&state->sib, &f->lru);
+ INIT_LIST_HEAD(&state->pending_sib);
+ }
+
+ f->hotplug_thread = kthread_run(try_hotplug, (void *) f,
+ "core_ctl/%d", first_cpu);
+ if (IS_ERR(f->hotplug_thread))
+ return PTR_ERR(f->hotplug_thread);
+ sched_setscheduler_nocheck(f->hotplug_thread, SCHED_FIFO, &param);
+
+ for_each_cpu(cpu, mask) {
+ state = &per_cpu(cpu_state, cpu);
+ state->inited = true;
+ }
+
+ kobject_init(&f->kobj, &ktype_core_ctl);
+ return kobject_add(&f->kobj, &dev->kobj, "core_ctl");
+}
+
+static int cpufreq_policy_cb(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_policy *policy = data;
+
+ switch (val) {
+ case CPUFREQ_CREATE_POLICY:
+ group_init(policy->related_cpus);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cpufreq_pol_nb = {
+ .notifier_call = cpufreq_policy_cb,
+};
+
+static int cpufreq_gov_cb(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_govinfo *info = data;
+
+ switch (val) {
+ case CPUFREQ_LOAD_CHANGE:
+ core_ctl_set_busy(info->cpu, info->load);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cpufreq_gov_nb = {
+ .notifier_call = cpufreq_gov_cb,
+};
+
+static int __init core_ctl_init(void)
+{
+ struct cpufreq_policy *policy;
+ unsigned int cpu;
+
+ register_cpu_notifier(&cpu_notifier);
+ cpufreq_register_notifier(&cpufreq_pol_nb, CPUFREQ_POLICY_NOTIFIER);
+ cpufreq_register_notifier(&cpufreq_gov_nb, CPUFREQ_GOVINFO_NOTIFIER);
+ init_timer_deferrable(&rq_avg_timer);
+ rq_avg_timer.function = rq_avg_timer_func;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ policy = cpufreq_cpu_get(cpu);
+ if (policy) {
+ group_init(policy->related_cpus);
+ cpufreq_cpu_put(policy);
+ }
+ }
+ put_online_cpus();
+ mod_timer(&rq_avg_timer, round_to_nw_start());
+ return 0;
+}
+
+late_initcall(core_ctl_init);