1dea4eb
From c517e903c4dbc9271b3cfb43b27d303dd6f03cd7 Mon Sep 17 00:00:00 2001
1dea4eb
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
1dea4eb
Date: Fri, 18 Mar 2016 15:36:25 +0100
1dea4eb
Subject: [PATCH] intel_pstate: Do not call wrmsrl_on_cpu() with disabled
1dea4eb
 interrupts
1dea4eb
1dea4eb
After commit a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with
1dea4eb
utilization update callbacks) wrmsrl_on_cpu() cannot be called in the
1dea4eb
intel_pstate_adjust_busy_pstate() path as that is executed with
1dea4eb
disabled interrupts.  However, atom_set_pstate() called from there
1dea4eb
via intel_pstate_set_pstate() uses wrmsrl_on_cpu() to update the
1dea4eb
IA32_PERF_CTL MSR which triggers the WARN_ON_ONCE() in
1dea4eb
smp_call_function_single().
1dea4eb
1dea4eb
The reason why wrmsrl_on_cpu() is used by atom_set_pstate() is
1dea4eb
because intel_pstate_set_pstate() calling it is also invoked during
1dea4eb
the initialization and cleanup of the driver and in those cases it is
1dea4eb
not guaranteed to be run on the CPU that is being updated.  However,
1dea4eb
in the case when intel_pstate_set_pstate() is called by
1dea4eb
intel_pstate_adjust_busy_pstate(), wrmsrl() can be used to update
1dea4eb
the register safely.  Moreover, intel_pstate_set_pstate() already
1dea4eb
contains code that only is executed if the function is called by
1dea4eb
intel_pstate_adjust_busy_pstate() and there is a special argument
1dea4eb
passed to it because of that.
1dea4eb
1dea4eb
To fix the problem at hand, rearrange the code taking the above
1dea4eb
observations into account.
1dea4eb
1dea4eb
First, replace the ->set() callback in struct pstate_funcs with a
1dea4eb
->get_val() one that will return the value to be written to the
1dea4eb
IA32_PERF_CTL MSR without updating the register.
1dea4eb
1dea4eb
Second, split intel_pstate_set_pstate() into two functions,
1dea4eb
intel_pstate_update_pstate() to be called by
1dea4eb
intel_pstate_adjust_busy_pstate() that will contain all of the
1dea4eb
intel_pstate_set_pstate() code which only needs to be executed in
1dea4eb
that case and will use wrmsrl() to update the MSR (after obtaining
1dea4eb
the value to write to it from the ->get_val() callback), and
1dea4eb
intel_pstate_set_min_pstate() to be invoked during the
1dea4eb
initialization and cleanup that will set the P-state to the
1dea4eb
minimum one and will update the MSR using wrmsrl_on_cpu().
1dea4eb
1dea4eb
Finally, move the code shared between intel_pstate_update_pstate()
1dea4eb
and intel_pstate_set_min_pstate() to a new static inline function
1dea4eb
intel_pstate_record_pstate() and make them both call it.
1dea4eb
1dea4eb
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
1dea4eb
Fixes: a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization update callbacks)
1dea4eb
---
1dea4eb
 drivers/cpufreq/intel_pstate.c | 73 +++++++++++++++++++++++++-----------------
1dea4eb
 1 file changed, 43 insertions(+), 30 deletions(-)
1dea4eb
1dea4eb
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
1dea4eb
index cb5607495816..4b644526fd59 100644
1dea4eb
--- a/drivers/cpufreq/intel_pstate.c
1dea4eb
+++ b/drivers/cpufreq/intel_pstate.c
1dea4eb
@@ -134,7 +134,7 @@ struct pstate_funcs {
1dea4eb
 	int (*get_min)(void);
1dea4eb
 	int (*get_turbo)(void);
1dea4eb
 	int (*get_scaling)(void);
1dea4eb
-	void (*set)(struct cpudata*, int pstate);
1dea4eb
+	u64 (*get_val)(struct cpudata*, int pstate);
1dea4eb
 	void (*get_vid)(struct cpudata *);
1dea4eb
 	int32_t (*get_target_pstate)(struct cpudata *);
1dea4eb
 };
1dea4eb
@@ -565,7 +565,7 @@ static int atom_get_turbo_pstate(void)
1dea4eb
 	return value & 0x7F;
1dea4eb
 }
1dea4eb
 
1dea4eb
-static void atom_set_pstate(struct cpudata *cpudata, int pstate)
1dea4eb
+static u64 atom_get_val(struct cpudata *cpudata, int pstate)
1dea4eb
 {
1dea4eb
 	u64 val;
1dea4eb
 	int32_t vid_fp;
1dea4eb
@@ -585,9 +585,7 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate)
1dea4eb
 	if (pstate > cpudata->pstate.max_pstate)
1dea4eb
 		vid = cpudata->vid.turbo;
1dea4eb
 
1dea4eb
-	val |= vid;
1dea4eb
-
1dea4eb
-	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
1dea4eb
+	return val | vid;
1dea4eb
 }
1dea4eb
 
1dea4eb
 static int silvermont_get_scaling(void)
1dea4eb
@@ -711,7 +709,7 @@ static inline int core_get_scaling(void)
1dea4eb
 	return 100000;
1dea4eb
 }
1dea4eb
 
1dea4eb
-static void core_set_pstate(struct cpudata *cpudata, int pstate)
1dea4eb
+static u64 core_get_val(struct cpudata *cpudata, int pstate)
1dea4eb
 {
1dea4eb
 	u64 val;
1dea4eb
 
1dea4eb
@@ -719,7 +717,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
1dea4eb
 	if (limits->no_turbo && !limits->turbo_disabled)
1dea4eb
 		val |= (u64)1 << 32;
1dea4eb
 
1dea4eb
-	wrmsrl(MSR_IA32_PERF_CTL, val);
1dea4eb
+	return val;
1dea4eb
 }
1dea4eb
 
1dea4eb
 static int knl_get_turbo_pstate(void)
1dea4eb
@@ -750,7 +748,7 @@ static struct cpu_defaults core_params = {
1dea4eb
 		.get_min = core_get_min_pstate,
1dea4eb
 		.get_turbo = core_get_turbo_pstate,
1dea4eb
 		.get_scaling = core_get_scaling,
1dea4eb
-		.set = core_set_pstate,
1dea4eb
+		.get_val = core_get_val,
1dea4eb
 		.get_target_pstate = get_target_pstate_use_performance,
1dea4eb
 	},
1dea4eb
 };
1dea4eb
@@ -769,7 +767,7 @@ static struct cpu_defaults silvermont_params = {
1dea4eb
 		.get_max_physical = atom_get_max_pstate,
1dea4eb
 		.get_min = atom_get_min_pstate,
1dea4eb
 		.get_turbo = atom_get_turbo_pstate,
1dea4eb
-		.set = atom_set_pstate,
1dea4eb
+		.get_val = atom_get_val,
1dea4eb
 		.get_scaling = silvermont_get_scaling,
1dea4eb
 		.get_vid = atom_get_vid,
1dea4eb
 		.get_target_pstate = get_target_pstate_use_cpu_load,
1dea4eb
@@ -790,7 +788,7 @@ static struct cpu_defaults airmont_params = {
1dea4eb
 		.get_max_physical = atom_get_max_pstate,
1dea4eb
 		.get_min = atom_get_min_pstate,
1dea4eb
 		.get_turbo = atom_get_turbo_pstate,
1dea4eb
-		.set = atom_set_pstate,
1dea4eb
+		.get_val = atom_get_val,
1dea4eb
 		.get_scaling = airmont_get_scaling,
1dea4eb
 		.get_vid = atom_get_vid,
1dea4eb
 		.get_target_pstate = get_target_pstate_use_cpu_load,
1dea4eb
@@ -812,7 +810,7 @@ static struct cpu_defaults knl_params = {
1dea4eb
 		.get_min = core_get_min_pstate,
1dea4eb
 		.get_turbo = knl_get_turbo_pstate,
1dea4eb
 		.get_scaling = core_get_scaling,
1dea4eb
-		.set = core_set_pstate,
1dea4eb
+		.get_val = core_get_val,
1dea4eb
 		.get_target_pstate = get_target_pstate_use_performance,
1dea4eb
 	},
1dea4eb
 };
1dea4eb
@@ -839,25 +837,24 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
1dea4eb
 	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
1dea4eb
 }
1dea4eb
 
1dea4eb
-static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
1dea4eb
+static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
1dea4eb
 {
1dea4eb
-	int max_perf, min_perf;
1dea4eb
-
1dea4eb
-	if (force) {
1dea4eb
-		update_turbo_state();
1dea4eb
-
1dea4eb
-		intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
1dea4eb
-
1dea4eb
-		pstate = clamp_t(int, pstate, min_perf, max_perf);
1dea4eb
-
1dea4eb
-		if (pstate == cpu->pstate.current_pstate)
1dea4eb
-			return;
1dea4eb
-	}
1dea4eb
 	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
1dea4eb
-
1dea4eb
 	cpu->pstate.current_pstate = pstate;
1dea4eb
+}
1dea4eb
 
1dea4eb
-	pstate_funcs.set(cpu, pstate);
1dea4eb
+static void intel_pstate_set_min_pstate(struct cpudata *cpu)
1dea4eb
+{
1dea4eb
+	int pstate = cpu->pstate.min_pstate;
1dea4eb
+
1dea4eb
+	intel_pstate_record_pstate(cpu, pstate);
1dea4eb
+	/*
1dea4eb
+	 * Generally, there is no guarantee that this code will always run on
1dea4eb
+	 * the CPU being updated, so force the register update to run on the
1dea4eb
+	 * right CPU.
1dea4eb
+	 */
1dea4eb
+	wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
1dea4eb
+		      pstate_funcs.get_val(cpu, pstate));
1dea4eb
 }
1dea4eb
 
1dea4eb
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
1dea4eb
@@ -870,7 +867,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
1dea4eb
 
1dea4eb
 	if (pstate_funcs.get_vid)
1dea4eb
 		pstate_funcs.get_vid(cpu);
1dea4eb
-	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
1dea4eb
+
1dea4eb
+	intel_pstate_set_min_pstate(cpu);
1dea4eb
 }
1dea4eb
 
1dea4eb
 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
1dea4eb
@@ -997,6 +995,21 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
1dea4eb
 	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
1dea4eb
 }
1dea4eb
 
1dea4eb
+static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
1dea4eb
+{
1dea4eb
+	int max_perf, min_perf;
1dea4eb
+
1dea4eb
+	update_turbo_state();
1dea4eb
+
1dea4eb
+	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
1dea4eb
+	pstate = clamp_t(int, pstate, min_perf, max_perf);
1dea4eb
+	if (pstate == cpu->pstate.current_pstate)
1dea4eb
+		return;
1dea4eb
+
1dea4eb
+	intel_pstate_record_pstate(cpu, pstate);
1dea4eb
+	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
1dea4eb
+}
1dea4eb
+
1dea4eb
 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
1dea4eb
 {
1dea4eb
 	int from, target_pstate;
1dea4eb
@@ -1006,7 +1019,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
1dea4eb
 
1dea4eb
 	target_pstate = pstate_funcs.get_target_pstate(cpu);
1dea4eb
 
1dea4eb
-	intel_pstate_set_pstate(cpu, target_pstate, true);
1dea4eb
+	intel_pstate_update_pstate(cpu, target_pstate);
1dea4eb
 
1dea4eb
 	sample = &cpu->sample;
1dea4eb
 	trace_pstate_sample(fp_toint(sample->core_pct_busy),
1dea4eb
@@ -1180,7 +1193,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
1dea4eb
 	if (hwp_active)
1dea4eb
 		return;
1dea4eb
 
1dea4eb
-	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
1dea4eb
+	intel_pstate_set_min_pstate(cpu);
1dea4eb
 }
1dea4eb
 
1dea4eb
 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1dea4eb
@@ -1255,7 +1268,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
1dea4eb
 	pstate_funcs.get_min   = funcs->get_min;
1dea4eb
 	pstate_funcs.get_turbo = funcs->get_turbo;
1dea4eb
 	pstate_funcs.get_scaling = funcs->get_scaling;
1dea4eb
-	pstate_funcs.set       = funcs->set;
1dea4eb
+	pstate_funcs.get_val   = funcs->get_val;
1dea4eb
 	pstate_funcs.get_vid   = funcs->get_vid;
1dea4eb
 	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
1dea4eb
 
1dea4eb
-- 
1dea4eb
2.5.0
1dea4eb