diff options
| author | Sultan Alsawaf <sultan@kerneltoast.com> | 2022-06-01 23:21:08 -0700 |
|---|---|---|
| committer | Kitsunejasutin <justineryandeguia@yahoo.com> | 2022-07-17 20:08:08 +0800 |
| commit | d30789b448597d221c5c6163aa63f5c9cd61214f (patch) | |
| tree | 4304dfec98f7d5a595d15c5867240199832a7532 /drivers/soc/qcom/scm.c | |
| parent | 2384c4764ac9fb6357c1f859269f200e6dcd15b1 (diff) | |
Sending synchronous IPIs to other CPUs involves spinning with preemption
disabled in order to wait for each IPI to finish. Keeping preemption off
for long periods of time like this is bad for system jitter, not to mention
the perf event IPIs are sent and flushed one at a time for each event for
each CPU rather than all at once for all the CPUs.
Since the way perf events are currently read is quite naive, rewrite it to
make it exploit parallelism and go much faster. IPIs for reading each perf
event are now sent to all CPUs asynchronously so that each CPU can work on
reading the events in parallel, and the dispatching CPU now sleeps rather
than spins when waiting for the IPIs to finish. Before the dispatching CPU
starts waiting though, it works on reading events for itself and then
reading events which can be read from any CPU in order to derive further
parallelism, and then waits for the IPIs to finish afterwards if they
haven't already.
Furthermore, there's now only one IPI sent to read all of a CPU's events
rather than an IPI sent for reading each event, which significantly speeds
up the event reads and reduces the number of IPIs sent.
This also checks for active SCM calls on a per-CPU basis rather than a
global basis so that unrelated CPUs don't get their counter reads skipped
and so that some CPUs can still receive fresh counter readings.
Overall, this makes the memlat driver much faster and more efficient, and
eliminates significant system jitter previously caused by IPI abuse.
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
Signed-off-by: Adithya R <gh0strider.2k18.reborn@gmail.com>
Diffstat (limited to 'drivers/soc/qcom/scm.c')
| -rw-r--r-- | drivers/soc/qcom/scm.c | 21 |
1 files changed, 12 insertions, 9 deletions
diff --git a/drivers/soc/qcom/scm.c b/drivers/soc/qcom/scm.c index b067c9baf4e2..fc36e8db1ab5 100644 --- a/drivers/soc/qcom/scm.c +++ b/drivers/soc/qcom/scm.c @@ -36,7 +36,7 @@ #define SCM_EBUSY -55 #define SCM_V2_EBUSY -12 -static atomic_t scm_call_count = ATOMIC_INIT(0); +static DEFINE_PER_CPU(atomic_t, scm_call_count); static DEFINE_MUTEX(scm_lock); /* @@ -433,11 +433,12 @@ static int ___scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5, static int __scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5, u64 *ret1, u64 *ret2, u64 *ret3) { + atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id()); int ret; - atomic_inc(&scm_call_count); + atomic_inc(cnt); ret = ___scm_call_armv8_64(x0, x1, x2, x3, x4, x5, ret1, ret2, ret3); - atomic_dec(&scm_call_count); + atomic_dec(cnt); return ret; } @@ -495,11 +496,12 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5, static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5, u64 *ret1, u64 *ret2, u64 *ret3) { + atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id()); int ret; - atomic_inc(&scm_call_count); + atomic_inc(cnt); ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3); - atomic_dec(&scm_call_count); + atomic_dec(cnt); return ret; } @@ -557,11 +559,12 @@ static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5, static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5, u64 *ret1, u64 *ret2, u64 *ret3) { + atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id()); int ret; - atomic_inc(&scm_call_count); + atomic_inc(cnt); ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3); - atomic_dec(&scm_call_count); + atomic_dec(cnt); return ret; } @@ -1352,7 +1355,7 @@ inline int scm_enable_mem_protection(void) #endif EXPORT_SYMBOL(scm_enable_mem_protection); -bool under_scm_call(void) +bool under_scm_call(int cpu) { - return atomic_read(&scm_call_count); + return atomic_read(per_cpu_ptr(&scm_call_count, cpu)); } |
