linux-kernel-cpufreq.c_2

深度解析CPU频率调节机制

最新推荐文章于 2025-04-14 16:42:09 发布

转载最新推荐文章于 2025-04-14 16:42:09 发布 · 246 阅读

kernel 专栏收录该内容

37 篇文章

订阅专栏

本文深入探讨了CPU频率调节的原理与实现机制，详细解释了如何通过内核API进行频率调整，包括设置频率范围、获取当前频率以及使用不同的频率策略。文章还介绍了通知机制、频率同步处理和频率策略切换等关键概念。

static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
{
int j;

policy->last_cpu = policy->cpu;
policy->cpu = cpu;

for_each_cpu(j, policy->cpus)
per_cpu(cpufreq_policy_cpu, j) = cpu;

#ifdef CONFIG_CPU_FREQ_TABLE
cpufreq_frequency_table_update_policy_cpu(policy);
#endif
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_UPDATE_POLICY_CPU, policy);
}

/**
* __cpufreq_remove_dev - remove a CPU device
*
* Removes the cpufreq interface for a CPU device.
* Caller should already have policy_rwsem in write mode for this CPU.
* This routine frees the rwsem before returning.
*/
static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
{
unsigned int cpu = dev->id, ret, cpus;
unsigned long flags;
struct cpufreq_policy *data;
struct kobject *kobj;
struct completion *cmp;
struct device *cpu_dev;

pr_debug("%s: unregistering CPU %u\n", __func__, cpu);

write_lock_irqsave(&cpufreq_driver_lock, flags);

data = per_cpu(cpufreq_cpu_data, cpu);
per_cpu(cpufreq_cpu_data, cpu) = NULL;

write_unlock_irqrestore(&cpufreq_driver_lock, flags);

if (!data) {
pr_debug("%s: No cpu_data found\n", __func__);
return -EINVAL;
}

if (cpufreq_driver->target)
__cpufreq_governor(data, CPUFREQ_GOV_STOP);

#ifdef CONFIG_HOTPLUG_CPU
if (!cpufreq_driver->setpolicy)
strncpy(per_cpu(cpufreq_cpu_governor, cpu),
data->governor->name, CPUFREQ_NAME_LEN);
#endif

WARN_ON(lock_policy_rwsem_write(cpu));
cpus = cpumask_weight(data->cpus);

if (cpus > 1)
cpumask_clear_cpu(cpu, data->cpus);
unlock_policy_rwsem_write(cpu);

if (cpu != data->cpu) {
  sysfs_remove_link(&dev->kobj, "cpufreq");
} else if (cpus > 1) {
  /* first sibling now owns the new sysfs dir */
  cpu_dev = get_cpu_device(cpumask_first(data->cpus));
  sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
  ret = kobject_move(&data->kobj, &cpu_dev->kobj);
  if (ret) {
   pr_err("%s: Failed to move kobj: %d", __func__, ret);

WARN_ON(lock_policy_rwsem_write(cpu));
cpumask_set_cpu(cpu, data->cpus);

   write_lock_irqsave(&cpufreq_driver_lock, flags);
   per_cpu(cpufreq_cpu_data, cpu) = data;
   write_unlock_irqrestore(&cpufreq_driver_lock, flags);

unlock_policy_rwsem_write(cpu);

   ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj,
     "cpufreq");
   return -EINVAL;
  }

  WARN_ON(lock_policy_rwsem_write(cpu));
  update_policy_cpu(data, cpu_dev->id);
  unlock_policy_rwsem_write(cpu);
  pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
    __func__, cpu_dev->id, cpu);
}

/* If cpu is last user of policy, free policy */
if (cpus == 1) {
if (cpufreq_driver->target)
__cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);

  lock_policy_rwsem_read(cpu);
  kobj = &data->kobj;
  cmp = &data->kobj_unregister;
  unlock_policy_rwsem_read(cpu);
  kobject_put(kobj);

  /* we need to make sure that the underlying kobj is actually
   * not referenced anymore by anybody before we proceed with
   * unloading.
   */
  pr_debug("waiting for dropping of refcount\n");
  wait_for_completion(cmp);
  pr_debug("wait complete\n");

if (cpufreq_driver->exit)
cpufreq_driver->exit(data);

  free_cpumask_var(data->related_cpus);
  free_cpumask_var(data->cpus);
  kfree(data);
} else {
  pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
  cpufreq_cpu_put(data);
  if (cpufreq_driver->target) {
   __cpufreq_governor(data, CPUFREQ_GOV_START);
   __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
  }
}

per_cpu(cpufreq_policy_cpu, cpu) = -1;
#ifdef CONFIG_HISI_RDR
if (g_rdr_cpu_on_off_hook != NULL)
g_rdr_cpu_on_off_hook(cpu, 0xff, 0);
#endif
return 0;
}

static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
{
unsigned int cpu = dev->id;
int retval;

if (cpu_is_offline(cpu))
return 0;

retval = __cpufreq_remove_dev(dev, sif);
return retval;
}

static void handle_update(struct work_struct *work)
{
struct cpufreq_policy *policy =
container_of(work, struct cpufreq_policy, update);
unsigned int cpu = policy->cpu;
pr_debug("handle_update for cpu %u called\n", cpu);
cpufreq_update_policy(cpu);
}

/**
* cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
* @cpu: cpu number
* @old_freq: CPU frequency the kernel thinks the CPU runs at
* @new_freq: CPU frequency the CPU actually runs at
*
* We adjust to current frequency first, and need to clean up later.
* So either call to cpufreq_update_policy() or schedule handle_update()).
*/
static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
unsigned int new_freq)
{
struct cpufreq_policy *policy;
struct cpufreq_freqs freqs;
unsigned long flags;

pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
"core thinks of %u, is %u kHz.\n", old_freq, new_freq);

freqs.old = old_freq;
freqs.new = new_freq;

read_lock_irqsave(&cpufreq_driver_lock, flags);
policy = per_cpu(cpufreq_cpu_data, cpu);
read_unlock_irqrestore(&cpufreq_driver_lock, flags);

cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
}

/**
* cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
* @cpu: CPU number
*
* This is the last known freq, without actually getting it from the driver.
* Return value will be same as what is shown in scaling_cur_freq in sysfs.
*/
unsigned int cpufreq_quick_get(unsigned int cpu)
{
struct cpufreq_policy *policy;
unsigned int ret_freq = 0;

if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
return cpufreq_driver->get(cpu);

policy = cpufreq_cpu_get(cpu);
if (policy) {
ret_freq = policy->cur;
cpufreq_cpu_put(policy);
}

return ret_freq;
}
EXPORT_SYMBOL(cpufreq_quick_get);

/**
* cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
* @cpu: CPU number
*
* Just return the max possible frequency for a given CPU.
*/
unsigned int cpufreq_quick_get_max(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
unsigned int ret_freq = 0;

if (policy) {
ret_freq = policy->max;
cpufreq_cpu_put(policy);
}

return ret_freq;
}
EXPORT_SYMBOL(cpufreq_quick_get_max);

static unsigned int __cpufreq_get(unsigned int cpu)
{
struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
unsigned int ret_freq = 0;

if (!cpufreq_driver->get)
return ret_freq;

ret_freq = cpufreq_driver->get(cpu);

if (ret_freq && policy->cur &&
  !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
  /* verify no discrepancy between actual and
     saved value exists */
  if (unlikely(ret_freq != policy->cur)) {
   cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
   schedule_work(&policy->update);
  }
}

return ret_freq;
}

/**
* cpufreq_get - get the current CPU frequency (in kHz)
* @cpu: CPU number
*
* Get the CPU current (static) CPU frequency
*/
unsigned int cpufreq_get(unsigned int cpu)
{
unsigned int ret_freq = 0;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);

if (!policy)
goto out;

if (unlikely(lock_policy_rwsem_read(cpu)))
goto out_policy;

ret_freq = __cpufreq_get(cpu);

unlock_policy_rwsem_read(cpu);

out_policy:
cpufreq_cpu_put(policy);
out:
return ret_freq;
}
EXPORT_SYMBOL(cpufreq_get);

static struct subsys_interface cpufreq_interface = {
.name = "cpufreq",
.subsys = &cpu_subsys,
.add_dev = cpufreq_add_dev,
.remove_dev = cpufreq_remove_dev,
};

/**
* cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
*
* This function is only executed for the boot processor. The other CPUs
* have been put offline by means of CPU hotplug.
*/
static int cpufreq_bp_suspend(void)
{
int ret = 0;

int cpu = smp_processor_id();
struct cpufreq_policy *cpu_policy;

pr_debug("suspending cpu %u\n", cpu);

/* If there's no policy for the boot CPU, we have nothing to do. */
cpu_policy = cpufreq_cpu_get(cpu);
if (!cpu_policy)
return 0;

if (cpufreq_driver->suspend) {
  ret = cpufreq_driver->suspend(cpu_policy);
  if (ret)
   printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
     "step on CPU %u\n", cpu_policy->cpu);
}

cpufreq_cpu_put(cpu_policy);
return ret;
}

/**
* cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
*
* 1.) resume CPUfreq hardware support (cpufreq_driver->resume())
* 2.) schedule call cpufreq_update_policy() ASAP as interrupts are
*     restored. It will verify that the current freq is in sync with
*     what we believe it to be. This is a bit later than when it
*     should be, but nonethteless it's better than calling
*     cpufreq_driver->get() here which might re-enable interrupts...
*
* This function is only executed for the boot CPU. The other CPUs have not
* been turned on yet.
*/
static void cpufreq_bp_resume(void)
{
int ret = 0;

int cpu = smp_processor_id();
struct cpufreq_policy *cpu_policy;

pr_debug("resuming cpu %u\n", cpu);

/* If there's no policy for the boot CPU, we have nothing to do. */
cpu_policy = cpufreq_cpu_get(cpu);
if (!cpu_policy)
return;

if (cpufreq_driver->resume) {
  ret = cpufreq_driver->resume(cpu_policy);
  if (ret) {
   printk(KERN_ERR "cpufreq: resume failed in ->resume "
     "step on CPU %u\n", cpu_policy->cpu);
   goto fail;
  }
}

schedule_work(&cpu_policy->update);

fail:
cpufreq_cpu_put(cpu_policy);
}

static struct syscore_ops cpufreq_syscore_ops = {
.suspend = cpufreq_bp_suspend,
.resume = cpufreq_bp_resume,
};

/**
* cpufreq_get_current_driver - return current driver's name
*
* Return the name string of the currently loaded cpufreq driver
* or NULL, if none.
*/
const char *cpufreq_get_current_driver(void)
{
if (cpufreq_driver)
return cpufreq_driver->name;

return NULL;
}
EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);

/*********************************************************************
* NOTIFIER LISTS INTERFACE *
*********************************************************************/

/**
* cpufreq_register_notifier - register a driver with cpufreq
* @nb: notifier function to register
*      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
*
* Add a driver to one of two lists: either a list of drivers that
*      are notified about clock rate changes (once before and once after
*      the transition), or a list of drivers that are notified about
*      changes in cpufreq policy.
*
* This function may sleep, and has the same return conditions as
* blocking_notifier_chain_register.
*/
int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
{
int ret;

if (cpufreq_disabled())
return -EINVAL;

WARN_ON(!init_cpufreq_transition_notifier_list_called);

switch (list) {
case CPUFREQ_TRANSITION_NOTIFIER:
  ret = srcu_notifier_chain_register(
    &cpufreq_transition_notifier_list, nb);
  break;
case CPUFREQ_POLICY_NOTIFIER:
  ret = blocking_notifier_chain_register(
    &cpufreq_policy_notifier_list, nb);
  break;
default:
  ret = -EINVAL;
}

return ret;
}
EXPORT_SYMBOL(cpufreq_register_notifier);

/**
* cpufreq_unregister_notifier - unregister a driver with cpufreq
* @nb: notifier block to be unregistered
* @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
*
* Remove a driver from the CPU frequency notifier list.
*
* This function may sleep, and has the same return conditions as
* blocking_notifier_chain_unregister.
*/
int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
{
int ret;

if (cpufreq_disabled())
return -EINVAL;

switch (list) {
case CPUFREQ_TRANSITION_NOTIFIER:
  ret = srcu_notifier_chain_unregister(
    &cpufreq_transition_notifier_list, nb);
  break;
case CPUFREQ_POLICY_NOTIFIER:
  ret = blocking_notifier_chain_unregister(
    &cpufreq_policy_notifier_list, nb);
  break;
default:
  ret = -EINVAL;
}

return ret;
}
EXPORT_SYMBOL(cpufreq_unregister_notifier);

/*********************************************************************
* GOVERNORS *
*********************************************************************/

int __cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
int retval = -EINVAL;
unsigned int old_target_freq = target_freq;

if (cpufreq_disabled())
return -ENODEV;

/* Make sure that target_freq is within supported range */
if (target_freq > policy->max)
target_freq = policy->max;
if (target_freq < policy->min)
target_freq = policy->min;

pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
policy->cpu, target_freq, relation, old_target_freq);

if (target_freq == policy->cur)
return 0;

if (cpufreq_driver->target)
retval = cpufreq_driver->target(policy, target_freq, relation);

return retval;
}
EXPORT_SYMBOL_GPL(__cpufreq_driver_target);

int cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
{
int ret = -EINVAL;

policy = cpufreq_cpu_get(policy->cpu);
if (!policy)
goto no_policy;

if (unlikely(lock_policy_rwsem_write(policy->cpu)))
goto fail;

ret = __cpufreq_driver_target(policy, target_freq, relation);

unlock_policy_rwsem_write(policy->cpu);

fail:
cpufreq_cpu_put(policy);
no_policy:
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);

int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
{
int ret = 0;

if (cpufreq_disabled())
return ret;

if (!cpufreq_driver->getavg)
return 0;

policy = cpufreq_cpu_get(policy->cpu);
if (!policy)
return -EINVAL;

ret = cpufreq_driver->getavg(policy, cpu);

cpufreq_cpu_put(policy);
return ret;
}
EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);

/*
* when "event" is CPUFREQ_GOV_LIMITS
*/

static int __cpufreq_governor(struct cpufreq_policy *policy,
unsigned int event)
{
int ret;

/* Only must be defined when default governor is known to have latency
restrictions, like e.g. conservative or ondemand.
That this is the case is already ensured in Kconfig
*/
#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
struct cpufreq_governor *gov = &cpufreq_gov_performance;
#else
struct cpufreq_governor *gov = NULL;
#endif

if (policy->governor->max_transition_latency &&
     policy->cpuinfo.transition_latency >
     policy->governor->max_transition_latency) {
  if (!gov)
   return -EINVAL;
  else {
   printk(KERN_WARNING "%s governor failed, too long"
          " transition latency of HW, fallback"
          " to %s governor\n",
          policy->governor->name,
          gov->name);
   policy->governor = gov;
  }
}

if (!try_module_get(policy->governor->owner))
return -EINVAL;

pr_debug("__cpufreq_governor for CPU %u, event %u\n",
policy->cpu, event);

mutex_lock(&cpufreq_governor_lock);
if ((!policy->governor_enabled && (event == CPUFREQ_GOV_STOP)) ||
     (policy->governor_enabled && (event == CPUFREQ_GOV_START))) {
  mutex_unlock(&cpufreq_governor_lock);
  return -EBUSY;
}

if (event == CPUFREQ_GOV_STOP)
policy->governor_enabled = false;
else if (event == CPUFREQ_GOV_START)
policy->governor_enabled = true;

mutex_unlock(&cpufreq_governor_lock);

ret = policy->governor->governor(policy, event);

if (!ret) {
  if (event == CPUFREQ_GOV_POLICY_INIT)
   policy->governor->initialized++;
  else if (event == CPUFREQ_GOV_POLICY_EXIT)
   policy->governor->initialized--;
} else {
  /* Restore original values */
  mutex_lock(&cpufreq_governor_lock);
  if (event == CPUFREQ_GOV_STOP)
   policy->governor_enabled = true;
  else if (event == CPUFREQ_GOV_START)
   policy->governor_enabled = false;
  mutex_unlock(&cpufreq_governor_lock);
}

/* we keep one module reference alive for
   each CPU governed by this CPU */
if ((event != CPUFREQ_GOV_START) || ret)
  module_put(policy->governor->owner);
if ((event == CPUFREQ_GOV_STOP) && !ret)
  module_put(policy->governor->owner);

return ret;
}

int cpufreq_register_governor(struct cpufreq_governor *governor)
{
int err;

if (!governor)
return -EINVAL;

if (cpufreq_disabled())
return -ENODEV;

mutex_lock(&cpufreq_governor_mutex);

governor->initialized = 0;
err = -EBUSY;
if (__find_governor(governor->name) == NULL) {
err = 0;
list_add(&governor->governor_list, &cpufreq_governor_list);
}

mutex_unlock(&cpufreq_governor_mutex);
return err;
}
EXPORT_SYMBOL_GPL(cpufreq_register_governor);

void cpufreq_unregister_governor(struct cpufreq_governor *governor)
{
#ifdef CONFIG_HOTPLUG_CPU
int cpu;
#endif

if (!governor)
return;

if (cpufreq_disabled())
return;

#ifdef CONFIG_HOTPLUG_CPU
for_each_present_cpu(cpu) {
  if (cpu_online(cpu))
   continue;
  if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
   strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
}
#endif

mutex_lock(&cpufreq_governor_mutex);
list_del(&governor->governor_list);
mutex_unlock(&cpufreq_governor_mutex);
return;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);

/*********************************************************************
* POLICY INTERFACE *
*********************************************************************/

/**
* cpufreq_get_policy - get the current cpufreq_policy
* @policy: struct cpufreq_policy into which the current cpufreq_policy
* is written
*
* Reads the current cpufreq policy.
*/
int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
{
struct cpufreq_policy *cpu_policy;
if (!policy)
return -EINVAL;

cpu_policy = cpufreq_cpu_get(cpu);
if (!cpu_policy)
return -EINVAL;

memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));

cpufreq_cpu_put(cpu_policy);
return 0;
}
EXPORT_SYMBOL(cpufreq_get_policy);

/*
* data : current policy.
* policy : policy to be set.
*/
static int __cpufreq_set_policy(struct cpufreq_policy *data,
struct cpufreq_policy *policy)
{
int ret = 0, failed = 1;

pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
policy->min, policy->max);

memcpy(&policy->cpuinfo, &data->cpuinfo,
sizeof(struct cpufreq_cpuinfo));

if (policy->min > data->max || policy->max < data->min) {
ret = -EINVAL;
goto error_out;
}

/* verify the cpu speed can be set within this limit */
ret = cpufreq_driver->verify(policy);
if (ret)
goto error_out;

/* adjust if necessary - all reasons */
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_ADJUST, policy);

/* adjust if necessary - hardware incompatibility*/
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_INCOMPATIBLE, policy);

/* verify the cpu speed can be set within this limit,
which might be different to the first one */
ret = cpufreq_driver->verify(policy);
if (ret)
goto error_out;

/* notification of the new policy */
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_NOTIFY, policy);

data->min = policy->min;
data->max = policy->max;

pr_debug("new min and max freqs are %u - %u kHz\n",
data->min, data->max);

if (cpufreq_driver->setpolicy) {
  data->policy = policy->policy;
  pr_debug("setting range\n");
  ret = cpufreq_driver->setpolicy(policy);
} else {
  if (policy->governor != data->governor) {
   /* save old, working values */
   struct cpufreq_governor *old_gov = data->governor;

pr_debug("governor switch\n");

   /* end old governor */
   if (data->governor) {
    __cpufreq_governor(data, CPUFREQ_GOV_STOP);
    unlock_policy_rwsem_write(policy->cpu);
    __cpufreq_governor(data,
      CPUFREQ_GOV_POLICY_EXIT);
    lock_policy_rwsem_write(policy->cpu);
   }

   /* start new governor */
   data->governor = policy->governor;
   if (!__cpufreq_governor(data, CPUFREQ_GOV_POLICY_INIT)) {
    if (!__cpufreq_governor(data, CPUFREQ_GOV_START)) {
     failed = 0;
    } else {
     unlock_policy_rwsem_write(policy->cpu);
     __cpufreq_governor(data,
       CPUFREQ_GOV_POLICY_EXIT);
     lock_policy_rwsem_write(policy->cpu);
    }
   }

   if (failed) {
    /* new governor failed, so re-start old one */
    pr_debug("starting governor %s failed\n",
       data->governor->name);
    if (old_gov) {
     data->governor = old_gov;
     __cpufreq_governor(data,
       CPUFREQ_GOV_POLICY_INIT);
     __cpufreq_governor(data,
          CPUFREQ_GOV_START);
    }
    ret = -EINVAL;
    goto error_out;
   }
   /* might be a policy change, too, so fall through */
  }
  pr_debug("governor: change or update limits\n");
  __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
}

error_out:
return ret;
}

/**
* cpufreq_update_policy - re-evaluate an existing cpufreq policy
* @cpu: CPU which shall be re-evaluated
*
* Useful for policy notifiers which have different necessities
* at different times.
*/
int cpufreq_update_policy(unsigned int cpu)
{
struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
struct cpufreq_policy policy;
int ret;

if (!data) {
ret = -ENODEV;
goto no_policy;
}

if (unlikely(lock_policy_rwsem_write(cpu))) {
ret = -EINVAL;
goto fail;
}

pr_debug("updating policy for CPU %u\n", cpu);
memcpy(&policy, data, sizeof(struct cpufreq_policy));
policy.min = data->user_policy.min;
policy.max = data->user_policy.max;
policy.policy = data->user_policy.policy;
policy.governor = data->user_policy.governor;

/* BIOS might change freq behind our back
   -> ask driver for current freq and notify governors about a change */
if (cpufreq_driver->get) {
  policy.cur = cpufreq_driver->get(cpu);
  if (!data->cur) {
   pr_debug("Driver did not initialize current freq");
   data->cur = policy.cur;
  } else {
   if (data->cur != policy.cur && cpufreq_driver->target)
    cpufreq_out_of_sync(cpu, data->cur,
        policy.cur);
  }
}

ret = __cpufreq_set_policy(data, &policy);

unlock_policy_rwsem_write(cpu);

fail:
cpufreq_cpu_put(data);
no_policy:
return ret;
}
EXPORT_SYMBOL(cpufreq_update_policy);

static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct device *dev;

dev = get_cpu_device(cpu);
if (dev) {
  switch (action) {
  case CPU_ONLINE:
  case CPU_ONLINE_FROZEN:
   cpufreq_add_dev(dev, NULL);
   kobject_uevent(&dev->kobj, KOBJ_ADD);
   break;
  case CPU_DOWN_PREPARE:
  case CPU_DOWN_PREPARE_FROZEN:
   __cpufreq_remove_dev(dev, NULL);
   break;
  case CPU_DOWN_FAILED:
  case CPU_DOWN_FAILED_FROZEN:
   cpufreq_add_dev(dev, NULL);
   break;
  }
}
return NOTIFY_OK;
}

static struct notifier_block __refdata cpufreq_cpu_notifier = {
.notifier_call = cpufreq_cpu_callback,
};

/*********************************************************************
* REGISTER / UNREGISTER CPUFREQ DRIVER *
*********************************************************************/

/**
* cpufreq_register_driver - register a CPU Frequency driver
* @driver_data: A struct cpufreq_driver containing the values#
* submitted by the CPU Frequency driver.
*
* Registers a CPU Frequency driver to this core code. This code
* returns zero on success, -EBUSY when another driver got here first
* (and isn't unregistered in the meantime).
*
*/
int cpufreq_register_driver(struct cpufreq_driver *driver_data)
{
unsigned long flags;
int ret;

if (cpufreq_disabled())
return -ENODEV;

if (!driver_data || !driver_data->verify || !driver_data->init ||
((!driver_data->setpolicy) && (!driver_data->target)))
return -EINVAL;

pr_debug("trying to register driver %s\n", driver_data->name);

if (driver_data->setpolicy)
driver_data->flags |= CPUFREQ_CONST_LOOPS;

write_lock_irqsave(&cpufreq_driver_lock, flags);
if (cpufreq_driver) {
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
return -EBUSY;
}
cpufreq_driver = driver_data;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);

ret = subsys_interface_register(&cpufreq_interface);
if (ret)
goto err_null_driver;

if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
int i;
ret = -ENODEV;

  /* check for at least one working CPU */
  for (i = 0; i < nr_cpu_ids; i++)
   if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
    ret = 0;
    break;
   }

  /* if all ->init() calls failed, unregister */
  if (ret) {
   pr_debug("no CPU initialized for driver %s\n",
       driver_data->name);
   goto err_if_unreg;
  }
}

register_hotcpu_notifier(&cpufreq_cpu_notifier);
pr_debug("driver %s up and running\n", driver_data->name);

return 0;
err_if_unreg:
subsys_interface_unregister(&cpufreq_interface);
err_null_driver:
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);

/**
* cpufreq_unregister_driver - unregister the current CPUFreq driver
*
* Unregister the current CPUFreq driver. Only call this if you have
* the right to do so, i.e. if you have succeeded in initialising before!
* Returns zero if successful, and -EINVAL if the cpufreq_driver is
* currently not initialised.
*/
int cpufreq_unregister_driver(struct cpufreq_driver *driver)
{
unsigned long flags;

if (!cpufreq_driver || (driver != cpufreq_driver))
return -EINVAL;

pr_debug("unregistering driver %s\n", driver->name);

subsys_interface_unregister(&cpufreq_interface);
unregister_hotcpu_notifier(&cpufreq_cpu_notifier);

write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);

return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);

static int __init cpufreq_core_init(void)
{
int cpu;

if (cpufreq_disabled())
return -ENODEV;

for_each_possible_cpu(cpu) {
per_cpu(cpufreq_policy_cpu, cpu) = -1;
init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
}

cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
BUG_ON(!cpufreq_global_kobject);
register_syscore_ops(&cpufreq_syscore_ops);

return 0;
}
core_initcall(cpufreq_core_init);