aboutsummaryrefslogtreecommitdiff
path: root/drivers/soc/qcom/scm.c
diff options
context:
space:
mode:
authorSultan Alsawaf <sultan@kerneltoast.com>2022-06-01 23:21:08 -0700
committerKitsunejasutin <justineryandeguia@yahoo.com>2022-07-17 20:08:08 +0800
commitd30789b448597d221c5c6163aa63f5c9cd61214f (patch)
tree4304dfec98f7d5a595d15c5867240199832a7532 /drivers/soc/qcom/scm.c
parent2384c4764ac9fb6357c1f859269f200e6dcd15b1 (diff)
memlat: Read perf counters in parallel and reduce system jitterHEADs12.1
Sending synchronous IPIs to other CPUs involves spinning with preemption disabled in order to wait for each IPI to finish. Keeping preemption off for long periods of time like this is bad for system jitter, not to mention the perf event IPIs are sent and flushed one at a time for each event for each CPU rather than all at once for all the CPUs. Since the way perf events are currently read is quite naive, rewrite it to make it exploit parallelism and go much faster. IPIs for reading each perf event are now sent to all CPUs asynchronously so that each CPU can work on reading the events in parallel, and the dispatching CPU now sleeps rather than spins when waiting for the IPIs to finish. Before the dispatching CPU starts waiting though, it works on reading events for itself and then reading events which can be read from any CPU in order to derive further parallelism, and then waits for the IPIs to finish afterwards if they haven't already. Furthermore, there's now only one IPI sent to read all of a CPU's events rather than an IPI sent for reading each event, which significantly speeds up the event reads and reduces the number of IPIs sent. This also checks for active SCM calls on a per-CPU basis rather than a global basis so that unrelated CPUs don't get their counter reads skipped and so that some CPUs can still receive fresh counter readings. Overall, this makes the memlat driver much faster and more efficient, and eliminates significant system jitter previously caused by IPI abuse. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> Signed-off-by: Adithya R <gh0strider.2k18.reborn@gmail.com>
Diffstat (limited to 'drivers/soc/qcom/scm.c')
-rw-r--r--drivers/soc/qcom/scm.c21
1 files changed, 12 insertions, 9 deletions
diff --git a/drivers/soc/qcom/scm.c b/drivers/soc/qcom/scm.c
index b067c9baf4e2..fc36e8db1ab5 100644
--- a/drivers/soc/qcom/scm.c
+++ b/drivers/soc/qcom/scm.c
@@ -36,7 +36,7 @@
#define SCM_EBUSY -55
#define SCM_V2_EBUSY -12
-static atomic_t scm_call_count = ATOMIC_INIT(0);
+static DEFINE_PER_CPU(atomic_t, scm_call_count);
static DEFINE_MUTEX(scm_lock);
/*
@@ -433,11 +433,12 @@ static int ___scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,
static int __scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
+ atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;
- atomic_inc(&scm_call_count);
+ atomic_inc(cnt);
ret = ___scm_call_armv8_64(x0, x1, x2, x3, x4, x5, ret1, ret2, ret3);
- atomic_dec(&scm_call_count);
+ atomic_dec(cnt);
return ret;
}
@@ -495,11 +496,12 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
+ atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;
- atomic_inc(&scm_call_count);
+ atomic_inc(cnt);
ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);
- atomic_dec(&scm_call_count);
+ atomic_dec(cnt);
return ret;
}
@@ -557,11 +559,12 @@ static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
+ atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;
- atomic_inc(&scm_call_count);
+ atomic_inc(cnt);
ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);
- atomic_dec(&scm_call_count);
+ atomic_dec(cnt);
return ret;
}
@@ -1352,7 +1355,7 @@ inline int scm_enable_mem_protection(void)
#endif
EXPORT_SYMBOL(scm_enable_mem_protection);
-bool under_scm_call(void)
+bool under_scm_call(int cpu)
{
- return atomic_read(&scm_call_count);
+ return atomic_read(per_cpu_ptr(&scm_call_count, cpu));
}