Commit 5a01f2e8 authored by Venkatesh Pallipadi's avatar Venkatesh Pallipadi Committed by Dave Jones

[CPUFREQ] Rewrite lock in cpufreq to eliminate cpufreq/hotplug related issues

Yet another attempt to resolve cpufreq and hotplug locking issues.

Patchset has 3 patches:
* Rewrite the lock infrastructure of cpufreq using a per cpu rwsem.
* Minor restructuring of work callback in ondemand driver.
* Use the new cpufreq rwsem infrastructure in ondemand work.

This patch:

Convert policy->lock to rwsem and move it to per_cpu area.
This rwsem will protect against both changing/accessing policy
related parameters and CPU hot plug/unplug.

[malattia@linux.it: fix oops in kref_put()]
Cc: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: default avatarVenkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: default avatarMattia Dongili <malattia@linux.it>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarDave Jones <davej@redhat.com>
parent c1200697
......@@ -41,8 +41,67 @@ static struct cpufreq_driver *cpufreq_driver;
static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
static DEFINE_SPINLOCK(cpufreq_driver_lock);
/*
* cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
* all cpufreq/hotplug/workqueue/etc related lock issues.
*
* The rules for this semaphore:
* - Any routine that wants to read from the policy structure will
* do a down_read on this semaphore.
* - Any routine that will write to the policy structure and/or may take away
* the policy altogether (eg. CPU hotplug), will hold this lock in write
* mode before doing so.
*
* Additional rules:
* - All holders of the lock should check to make sure that the CPU they
* are concerned with are online after they get the lock.
* - Governor routines that can be called in cpufreq hotplug path should not
* take this sem as top level hotplug notifier handler takes this.
*/
static DEFINE_PER_CPU(int, policy_cpu);
static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
#define lock_policy_rwsem(mode, cpu) \
int lock_policy_rwsem_##mode \
(int cpu) \
{ \
int policy_cpu = per_cpu(policy_cpu, cpu); \
BUG_ON(policy_cpu == -1); \
down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \
if (unlikely(!cpu_online(cpu))) { \
up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \
return -1; \
} \
\
return 0; \
}
lock_policy_rwsem(read, cpu);
EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
lock_policy_rwsem(write, cpu);
EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
void unlock_policy_rwsem_read(int cpu)
{
int policy_cpu = per_cpu(policy_cpu, cpu);
BUG_ON(policy_cpu == -1);
up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
}
EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
void unlock_policy_rwsem_write(int cpu)
{
int policy_cpu = per_cpu(policy_cpu, cpu);
BUG_ON(policy_cpu == -1);
up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
}
EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
/* internal prototypes */
static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
static unsigned int __cpufreq_get(unsigned int cpu);
static void handle_update(struct work_struct *work);
/**
......@@ -415,10 +474,8 @@ static ssize_t store_##file_name \
if (ret != 1) \
return -EINVAL; \
\
mutex_lock(&policy->lock); \
ret = __cpufreq_set_policy(policy, &new_policy); \
policy->user_policy.object = policy->object; \
mutex_unlock(&policy->lock); \
\
return ret ? ret : count; \
}
......@@ -432,7 +489,7 @@ store_one(scaling_max_freq,max);
static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
char *buf)
{
unsigned int cur_freq = cpufreq_get(policy->cpu);
unsigned int cur_freq = __cpufreq_get(policy->cpu);
if (!cur_freq)
return sprintf(buf, "<unknown>");
return sprintf(buf, "%u\n", cur_freq);
......@@ -479,12 +536,10 @@ static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
/* Do not use cpufreq_set_policy here or the user_policy.max
will be wrongly overridden */
mutex_lock(&policy->lock);
ret = __cpufreq_set_policy(policy, &new_policy);
policy->user_policy.policy = policy->policy;
policy->user_policy.governor = policy->governor;
mutex_unlock(&policy->lock);
if (ret)
return ret;
......@@ -589,11 +644,17 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
policy = cpufreq_cpu_get(policy->cpu);
if (!policy)
return -EINVAL;
if (lock_policy_rwsem_read(policy->cpu) < 0)
return -EINVAL;
if (fattr->show)
ret = fattr->show(policy, buf);
else
ret = -EIO;
unlock_policy_rwsem_read(policy->cpu);
cpufreq_cpu_put(policy);
return ret;
}
......@@ -607,11 +668,17 @@ static ssize_t store(struct kobject * kobj, struct attribute * attr,
policy = cpufreq_cpu_get(policy->cpu);
if (!policy)
return -EINVAL;
if (lock_policy_rwsem_write(policy->cpu) < 0)
return -EINVAL;
if (fattr->store)
ret = fattr->store(policy, buf, count);
else
ret = -EIO;
unlock_policy_rwsem_write(policy->cpu);
cpufreq_cpu_put(policy);
return ret;
}
......@@ -685,8 +752,10 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
policy->cpu = cpu;
policy->cpus = cpumask_of_cpu(cpu);
mutex_init(&policy->lock);
mutex_lock(&policy->lock);
/* Initially set CPU itself as the policy_cpu */
per_cpu(policy_cpu, cpu) = cpu;
lock_policy_rwsem_write(cpu);
init_completion(&policy->kobj_unregister);
INIT_WORK(&policy->update, handle_update);
......@@ -696,7 +765,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
ret = cpufreq_driver->init(policy);
if (ret) {
dprintk("initialization failed\n");
mutex_unlock(&policy->lock);
unlock_policy_rwsem_write(cpu);
goto err_out;
}
......@@ -710,6 +779,14 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
*/
managed_policy = cpufreq_cpu_get(j);
if (unlikely(managed_policy)) {
/* Set proper policy_cpu */
unlock_policy_rwsem_write(cpu);
per_cpu(policy_cpu, cpu) = managed_policy->cpu;
if (lock_policy_rwsem_write(cpu) < 0)
goto err_out_driver_exit;
spin_lock_irqsave(&cpufreq_driver_lock, flags);
managed_policy->cpus = policy->cpus;
cpufreq_cpu_data[cpu] = managed_policy;
......@@ -720,13 +797,13 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
&managed_policy->kobj,
"cpufreq");
if (ret) {
mutex_unlock(&policy->lock);
unlock_policy_rwsem_write(cpu);
goto err_out_driver_exit;
}
cpufreq_debug_enable_ratelimit();
mutex_unlock(&policy->lock);
ret = 0;
unlock_policy_rwsem_write(cpu);
goto err_out_driver_exit; /* call driver->exit() */
}
}
......@@ -740,7 +817,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
ret = kobject_register(&policy->kobj);
if (ret) {
mutex_unlock(&policy->lock);
unlock_policy_rwsem_write(cpu);
goto err_out_driver_exit;
}
/* set up files for this cpu device */
......@@ -755,8 +832,10 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
spin_lock_irqsave(&cpufreq_driver_lock, flags);
for_each_cpu_mask(j, policy->cpus)
for_each_cpu_mask(j, policy->cpus) {
cpufreq_cpu_data[j] = policy;
per_cpu(policy_cpu, j) = policy->cpu;
}
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
/* symlink affected CPUs */
......@@ -772,14 +851,14 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
"cpufreq");
if (ret) {
mutex_unlock(&policy->lock);
unlock_policy_rwsem_write(cpu);
goto err_out_unregister;
}
}
policy->governor = NULL; /* to assure that the starting sequence is
* run in cpufreq_set_policy */
mutex_unlock(&policy->lock);
unlock_policy_rwsem_write(cpu);
/* set default policy */
ret = cpufreq_set_policy(&new_policy);
......@@ -820,11 +899,13 @@ module_out:
/**
* cpufreq_remove_dev - remove a CPU device
* __cpufreq_remove_dev - remove a CPU device
*
* Removes the cpufreq interface for a CPU device.
* Caller should already have policy_rwsem in write mode for this CPU.
* This routine frees the rwsem before returning.
*/
static int cpufreq_remove_dev (struct sys_device * sys_dev)
static int __cpufreq_remove_dev (struct sys_device * sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long flags;
......@@ -843,6 +924,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
if (!data) {
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
cpufreq_debug_enable_ratelimit();
unlock_policy_rwsem_write(cpu);
return -EINVAL;
}
cpufreq_cpu_data[cpu] = NULL;
......@@ -859,6 +941,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
sysfs_remove_link(&sys_dev->kobj, "cpufreq");
cpufreq_cpu_put(data);
cpufreq_debug_enable_ratelimit();
unlock_policy_rwsem_write(cpu);
return 0;
}
#endif
......@@ -867,6 +950,7 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
if (!kobject_get(&data->kobj)) {
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
cpufreq_debug_enable_ratelimit();
unlock_policy_rwsem_write(cpu);
return -EFAULT;
}
......@@ -900,10 +984,10 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
#endif
mutex_lock(&data->lock);
if (cpufreq_driver->target)
__cpufreq_governor(data, CPUFREQ_GOV_STOP);
mutex_unlock(&data->lock);
unlock_policy_rwsem_write(cpu);
kobject_unregister(&data->kobj);
......@@ -927,6 +1011,18 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
}
static int cpufreq_remove_dev (struct sys_device * sys_dev)
{
unsigned int cpu = sys_dev->id;
int retval;
if (unlikely(lock_policy_rwsem_write(cpu)))
BUG();
retval = __cpufreq_remove_dev(sys_dev);
return retval;
}
static void handle_update(struct work_struct *work)
{
struct cpufreq_policy *policy =
......@@ -974,9 +1070,12 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
unsigned int ret_freq = 0;
if (policy) {
mutex_lock(&policy->lock);
if (unlikely(lock_policy_rwsem_read(cpu)))
return ret_freq;
ret_freq = policy->cur;
mutex_unlock(&policy->lock);
unlock_policy_rwsem_read(cpu);
cpufreq_cpu_put(policy);
}
......@@ -985,24 +1084,13 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
EXPORT_SYMBOL(cpufreq_quick_get);
/**
* cpufreq_get - get the current CPU frequency (in kHz)
* @cpu: CPU number
*
* Get the CPU current (static) CPU frequency
*/
unsigned int cpufreq_get(unsigned int cpu)
static unsigned int __cpufreq_get(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
unsigned int ret_freq = 0;
if (!policy)
return 0;
if (!cpufreq_driver->get)
goto out;
mutex_lock(&policy->lock);
return (ret_freq);
ret_freq = cpufreq_driver->get(cpu);
......@@ -1016,11 +1104,33 @@ unsigned int cpufreq_get(unsigned int cpu)
}
}
mutex_unlock(&policy->lock);
return (ret_freq);
}
out:
cpufreq_cpu_put(policy);
/**
* cpufreq_get - get the current CPU frequency (in kHz)
* @cpu: CPU number
*
* Get the CPU current (static) CPU frequency
*/
unsigned int cpufreq_get(unsigned int cpu)
{
unsigned int ret_freq = 0;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
if (!policy)
goto out;
if (unlikely(lock_policy_rwsem_read(cpu)))
goto out_policy;
ret_freq = __cpufreq_get(cpu);
unlock_policy_rwsem_read(cpu);
out_policy:
cpufreq_cpu_put(policy);
out:
return (ret_freq);
}
EXPORT_SYMBOL(cpufreq_get);
......@@ -1297,18 +1407,19 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
if (!policy)
return -EINVAL;
mutex_lock(&policy->lock);
if (unlikely(lock_policy_rwsem_write(policy->cpu)))
return -EINVAL;
ret = __cpufreq_driver_target(policy, target_freq, relation);
mutex_unlock(&policy->lock);
unlock_policy_rwsem_write(policy->cpu);
cpufreq_cpu_put(policy);
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);
int cpufreq_driver_getavg(struct cpufreq_policy *policy)
int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
{
int ret = 0;
......@@ -1316,17 +1427,13 @@ int cpufreq_driver_getavg(struct cpufreq_policy *policy)
if (!policy)
return -EINVAL;
mutex_lock(&policy->lock);
if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
ret = cpufreq_driver->getavg(policy->cpu);
mutex_unlock(&policy->lock);
cpufreq_cpu_put(policy);
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_driver_getavg);
EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
/*
* when "event" is CPUFREQ_GOV_LIMITS
......@@ -1410,9 +1517,7 @@ int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
if (!cpu_policy)
return -EINVAL;
mutex_lock(&cpu_policy->lock);
memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
mutex_unlock(&cpu_policy->lock);
cpufreq_cpu_put(cpu_policy);
return 0;
......@@ -1528,8 +1633,9 @@ int cpufreq_set_policy(struct cpufreq_policy *policy)
if (!data)
return -EINVAL;
/* lock this CPU */
mutex_lock(&data->lock);
if (unlikely(lock_policy_rwsem_write(policy->cpu)))
return -EINVAL;
ret = __cpufreq_set_policy(data, policy);
data->user_policy.min = data->min;
......@@ -1537,7 +1643,7 @@ int cpufreq_set_policy(struct cpufreq_policy *policy)
data->user_policy.policy = data->policy;
data->user_policy.governor = data->governor;
mutex_unlock(&data->lock);
unlock_policy_rwsem_write(policy->cpu);
cpufreq_cpu_put(data);
......@@ -1562,7 +1668,8 @@ int cpufreq_update_policy(unsigned int cpu)
if (!data)
return -ENODEV;
mutex_lock(&data->lock);
if (unlikely(lock_policy_rwsem_write(cpu)))
return -EINVAL;
dprintk("updating policy for CPU %u\n", cpu);
memcpy(&policy, data, sizeof(struct cpufreq_policy));
......@@ -1587,7 +1694,8 @@ int cpufreq_update_policy(unsigned int cpu)
ret = __cpufreq_set_policy(data, &policy);
mutex_unlock(&data->lock);
unlock_policy_rwsem_write(cpu);
cpufreq_cpu_put(data);
return ret;
}
......@@ -1597,31 +1705,28 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct cpufreq_policy *policy;
struct sys_device *sys_dev;
struct cpufreq_policy *policy;
sys_dev = get_cpu_sysdev(cpu);
if (sys_dev) {
switch (action) {
case CPU_ONLINE:
cpufreq_add_dev(sys_dev);
break;
case CPU_DOWN_PREPARE:
/*
* We attempt to put this cpu in lowest frequency
* possible before going down. This will permit
* hardware-managed P-State to switch other related
* threads to min or higher speeds if possible.
*/
if (unlikely(lock_policy_rwsem_write(cpu)))
BUG();
policy = cpufreq_cpu_data[cpu];
if (policy) {
cpufreq_driver_target(policy, policy->min,
__cpufreq_driver_target(policy, policy->min,
CPUFREQ_RELATION_H);
}
__cpufreq_remove_dev(sys_dev);
break;
case CPU_DEAD:
cpufreq_remove_dev(sys_dev);
case CPU_DOWN_FAILED:
cpufreq_add_dev(sys_dev);
break;
}
}
......@@ -1735,3 +1840,16 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
static int __init cpufreq_core_init(void)
{
int cpu;
for_each_possible_cpu(cpu) {
per_cpu(policy_cpu, cpu) = -1;
init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
}
return 0;
}
core_initcall(cpufreq_core_init);
......@@ -84,9 +84,6 @@ struct cpufreq_policy {
unsigned int policy; /* see above */
struct cpufreq_governor *governor; /* see below */
struct mutex lock; /* CPU ->setpolicy or ->target may
only be called once a time */
struct work_struct update; /* if update_policy() needs to be
* called, but you're in IRQ context */
......@@ -172,11 +169,16 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int relation);
extern int cpufreq_driver_getavg(struct cpufreq_policy *policy);
extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy);
int cpufreq_register_governor(struct cpufreq_governor *governor);
void cpufreq_unregister_governor(struct cpufreq_governor *governor);
int lock_policy_rwsem_read(int cpu);
int lock_policy_rwsem_write(int cpu);
void unlock_policy_rwsem_read(int cpu);
void unlock_policy_rwsem_write(int cpu);
/*********************************************************************
* CPUFREQ DRIVER INTERFACE *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment