linux-kernel-cpufreq.c_2

本文深入探讨了CPU频率调节的原理与实现机制,详细解释了如何通过内核API进行频率调整,包括设置频率范围、获取当前频率以及使用不同的频率策略。文章还介绍了通知机制、频率同步处理和频率策略切换等关键概念。

static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
{
 int j;

 policy->last_cpu = policy->cpu;
 policy->cpu = cpu;

 for_each_cpu(j, policy->cpus)
  per_cpu(cpufreq_policy_cpu, j) = cpu;

#ifdef CONFIG_CPU_FREQ_TABLE
 cpufreq_frequency_table_update_policy_cpu(policy);
#endif
 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
   CPUFREQ_UPDATE_POLICY_CPU, policy);
}

/**
 * __cpufreq_remove_dev - remove a CPU device
 *
 * Removes the cpufreq interface for a CPU device.
 * Caller should already have policy_rwsem in write mode for this CPU.
 * This routine frees the rwsem before returning.
 */
static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
{
 unsigned int cpu = dev->id, ret, cpus;
 unsigned long flags;
 struct cpufreq_policy *data;
 struct kobject *kobj;
 struct completion *cmp;
 struct device *cpu_dev;

 pr_debug("%s: unregistering CPU %u\n", __func__, cpu);

 write_lock_irqsave(&cpufreq_driver_lock, flags);

 data = per_cpu(cpufreq_cpu_data, cpu);
 per_cpu(cpufreq_cpu_data, cpu) = NULL;

 write_unlock_irqrestore(&cpufreq_driver_lock, flags);

 if (!data) {
  pr_debug("%s: No cpu_data found\n", __func__);
  return -EINVAL;
 }

 if (cpufreq_driver->target)
  __cpufreq_governor(data, CPUFREQ_GOV_STOP);

#ifdef CONFIG_HOTPLUG_CPU
 if (!cpufreq_driver->setpolicy)
  strncpy(per_cpu(cpufreq_cpu_governor, cpu),
   data->governor->name, CPUFREQ_NAME_LEN);
#endif

 WARN_ON(lock_policy_rwsem_write(cpu));
 cpus = cpumask_weight(data->cpus);

 if (cpus > 1)
  cpumask_clear_cpu(cpu, data->cpus);
 unlock_policy_rwsem_write(cpu);

 if (cpu != data->cpu) {
  sysfs_remove_link(&dev->kobj, "cpufreq");
 } else if (cpus > 1) {
  /* first sibling now owns the new sysfs dir */
  cpu_dev = get_cpu_device(cpumask_first(data->cpus));
  sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
  ret = kobject_move(&data->kobj, &cpu_dev->kobj);
  if (ret) {
   pr_err("%s: Failed to move kobj: %d", __func__, ret);

   WARN_ON(lock_policy_rwsem_write(cpu));
   cpumask_set_cpu(cpu, data->cpus);

   write_lock_irqsave(&cpufreq_driver_lock, flags);
   per_cpu(cpufreq_cpu_data, cpu) = data;
   write_unlock_irqrestore(&cpufreq_driver_lock, flags);

   unlock_policy_rwsem_write(cpu);

   ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj,
     "cpufreq");
   return -EINVAL;
  }

  WARN_ON(lock_policy_rwsem_write(cpu));
  update_policy_cpu(data, cpu_dev->id);
  unlock_policy_rwsem_write(cpu);
  pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
    __func__, cpu_dev->id, cpu);
 }

 /* If cpu is last user of policy, free policy */
 if (cpus == 1) {
  if (cpufreq_driver->target)
   __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);

  lock_policy_rwsem_read(cpu);
  kobj = &data->kobj;
  cmp = &data->kobj_unregister;
  unlock_policy_rwsem_read(cpu);
  kobject_put(kobj);

  /* we need to make sure that the underlying kobj is actually
   * not referenced anymore by anybody before we proceed with
   * unloading.
   */
  pr_debug("waiting for dropping of refcount\n");
  wait_for_completion(cmp);
  pr_debug("wait complete\n");

  if (cpufreq_driver->exit)
   cpufreq_driver->exit(data);

  free_cpumask_var(data->related_cpus);
  free_cpumask_var(data->cpus);
  kfree(data);
 } else {
  pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
  cpufreq_cpu_put(data);
  if (cpufreq_driver->target) {
   __cpufreq_governor(data, CPUFREQ_GOV_START);
   __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
  }
 }

 per_cpu(cpufreq_policy_cpu, cpu) = -1;
#ifdef CONFIG_HISI_RDR
 if (g_rdr_cpu_on_off_hook != NULL)
  g_rdr_cpu_on_off_hook(cpu, 0xff, 0);
#endif
 return 0;
}


static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
{
 unsigned int cpu = dev->id;
 int retval;

 if (cpu_is_offline(cpu))
  return 0;

 retval = __cpufreq_remove_dev(dev, sif);
 return retval;
}


static void handle_update(struct work_struct *work)
{
 struct cpufreq_policy *policy =
  container_of(work, struct cpufreq_policy, update);
 unsigned int cpu = policy->cpu;
 pr_debug("handle_update for cpu %u called\n", cpu);
 cpufreq_update_policy(cpu);
}

/**
 * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
 * @cpu: cpu number
 * @old_freq: CPU frequency the kernel thinks the CPU runs at
 * @new_freq: CPU frequency the CPU actually runs at
 *
 * We adjust to current frequency first, and need to clean up later.
 * So either call to cpufreq_update_policy() or schedule handle_update()).
 */
static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
    unsigned int new_freq)
{
 struct cpufreq_policy *policy;
 struct cpufreq_freqs freqs;
 unsigned long flags;


 pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
        "core thinks of %u, is %u kHz.\n", old_freq, new_freq);

 freqs.old = old_freq;
 freqs.new = new_freq;

 read_lock_irqsave(&cpufreq_driver_lock, flags);
 policy = per_cpu(cpufreq_cpu_data, cpu);
 read_unlock_irqrestore(&cpufreq_driver_lock, flags);

 cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
 cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
}


/**
 * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
 * @cpu: CPU number
 *
 * This is the last known freq, without actually getting it from the driver.
 * Return value will be same as what is shown in scaling_cur_freq in sysfs.
 */
unsigned int cpufreq_quick_get(unsigned int cpu)
{
 struct cpufreq_policy *policy;
 unsigned int ret_freq = 0;

 if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
  return cpufreq_driver->get(cpu);

 policy = cpufreq_cpu_get(cpu);
 if (policy) {
  ret_freq = policy->cur;
  cpufreq_cpu_put(policy);
 }

 return ret_freq;
}
EXPORT_SYMBOL(cpufreq_quick_get);

/**
 * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
 * @cpu: CPU number
 *
 * Just return the max possible frequency for a given CPU.
 */
unsigned int cpufreq_quick_get_max(unsigned int cpu)
{
 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 unsigned int ret_freq = 0;

 if (policy) {
  ret_freq = policy->max;
  cpufreq_cpu_put(policy);
 }

 return ret_freq;
}
EXPORT_SYMBOL(cpufreq_quick_get_max);


static unsigned int __cpufreq_get(unsigned int cpu)
{
 struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
 unsigned int ret_freq = 0;

 if (!cpufreq_driver->get)
  return ret_freq;

 ret_freq = cpufreq_driver->get(cpu);

 if (ret_freq && policy->cur &&
  !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
  /* verify no discrepancy between actual and
     saved value exists */
  if (unlikely(ret_freq != policy->cur)) {
   cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
   schedule_work(&policy->update);
  }
 }

 return ret_freq;
}

/**
 * cpufreq_get - get the current CPU frequency (in kHz)
 * @cpu: CPU number
 *
 * Get the CPU current (static) CPU frequency
 */
unsigned int cpufreq_get(unsigned int cpu)
{
 unsigned int ret_freq = 0;
 struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);

 if (!policy)
  goto out;

 if (unlikely(lock_policy_rwsem_read(cpu)))
  goto out_policy;

 ret_freq = __cpufreq_get(cpu);

 unlock_policy_rwsem_read(cpu);

out_policy:
 cpufreq_cpu_put(policy);
out:
 return ret_freq;
}
EXPORT_SYMBOL(cpufreq_get);

static struct subsys_interface cpufreq_interface = {
 .name  = "cpufreq",
 .subsys  = &cpu_subsys,
 .add_dev = cpufreq_add_dev,
 .remove_dev = cpufreq_remove_dev,
};


/**
 * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
 *
 * This function is only executed for the boot processor.  The other CPUs
 * have been put offline by means of CPU hotplug.
 */
static int cpufreq_bp_suspend(void)
{
 int ret = 0;

 int cpu = smp_processor_id();
 struct cpufreq_policy *cpu_policy;

 pr_debug("suspending cpu %u\n", cpu);

 /* If there's no policy for the boot CPU, we have nothing to do. */
 cpu_policy = cpufreq_cpu_get(cpu);
 if (!cpu_policy)
  return 0;

 if (cpufreq_driver->suspend) {
  ret = cpufreq_driver->suspend(cpu_policy);
  if (ret)
   printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
     "step on CPU %u\n", cpu_policy->cpu);
 }

 cpufreq_cpu_put(cpu_policy);
 return ret;
}

/**
 * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
 *
 * 1.) resume CPUfreq hardware support (cpufreq_driver->resume())
 * 2.) schedule call cpufreq_update_policy() ASAP as interrupts are
 *     restored. It will verify that the current freq is in sync with
 *     what we believe it to be. This is a bit later than when it
 *     should be, but nonethteless it's better than calling
 *     cpufreq_driver->get() here which might re-enable interrupts...
 *
 * This function is only executed for the boot CPU.  The other CPUs have not
 * been turned on yet.
 */
static void cpufreq_bp_resume(void)
{
 int ret = 0;

 int cpu = smp_processor_id();
 struct cpufreq_policy *cpu_policy;

 pr_debug("resuming cpu %u\n", cpu);

 /* If there's no policy for the boot CPU, we have nothing to do. */
 cpu_policy = cpufreq_cpu_get(cpu);
 if (!cpu_policy)
  return;

 if (cpufreq_driver->resume) {
  ret = cpufreq_driver->resume(cpu_policy);
  if (ret) {
   printk(KERN_ERR "cpufreq: resume failed in ->resume "
     "step on CPU %u\n", cpu_policy->cpu);
   goto fail;
  }
 }

 schedule_work(&cpu_policy->update);

fail:
 cpufreq_cpu_put(cpu_policy);
}

static struct syscore_ops cpufreq_syscore_ops = {
 .suspend = cpufreq_bp_suspend,
 .resume  = cpufreq_bp_resume,
};

/**
 * cpufreq_get_current_driver - return current driver's name
 *
 * Return the name string of the currently loaded cpufreq driver
 * or NULL, if none.
 */
const char *cpufreq_get_current_driver(void)
{
 if (cpufreq_driver)
  return cpufreq_driver->name;

 return NULL;
}
EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);

/*********************************************************************
 *                     NOTIFIER LISTS INTERFACE                      *
 *********************************************************************/

/**
 * cpufreq_register_notifier - register a driver with cpufreq
 * @nb: notifier function to register
 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
 *
 * Add a driver to one of two lists: either a list of drivers that
 *      are notified about clock rate changes (once before and once after
 *      the transition), or a list of drivers that are notified about
 *      changes in cpufreq policy.
 *
 * This function may sleep, and has the same return conditions as
 * blocking_notifier_chain_register.
 */
int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
{
 int ret;

 if (cpufreq_disabled())
  return -EINVAL;

 WARN_ON(!init_cpufreq_transition_notifier_list_called);

 switch (list) {
 case CPUFREQ_TRANSITION_NOTIFIER:
  ret = srcu_notifier_chain_register(
    &cpufreq_transition_notifier_list, nb);
  break;
 case CPUFREQ_POLICY_NOTIFIER:
  ret = blocking_notifier_chain_register(
    &cpufreq_policy_notifier_list, nb);
  break;
 default:
  ret = -EINVAL;
 }

 return ret;
}
EXPORT_SYMBOL(cpufreq_register_notifier);


/**
 * cpufreq_unregister_notifier - unregister a driver with cpufreq
 * @nb: notifier block to be unregistered
 *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
 *
 * Remove a driver from the CPU frequency notifier list.
 *
 * This function may sleep, and has the same return conditions as
 * blocking_notifier_chain_unregister.
 */
int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
{
 int ret;

 if (cpufreq_disabled())
  return -EINVAL;

 switch (list) {
 case CPUFREQ_TRANSITION_NOTIFIER:
  ret = srcu_notifier_chain_unregister(
    &cpufreq_transition_notifier_list, nb);
  break;
 case CPUFREQ_POLICY_NOTIFIER:
  ret = blocking_notifier_chain_unregister(
    &cpufreq_policy_notifier_list, nb);
  break;
 default:
  ret = -EINVAL;
 }

 return ret;
}
EXPORT_SYMBOL(cpufreq_unregister_notifier);


/*********************************************************************
 *                              GOVERNORS                            *
 *********************************************************************/


int __cpufreq_driver_target(struct cpufreq_policy *policy,
       unsigned int target_freq,
       unsigned int relation)
{
 int retval = -EINVAL;
 unsigned int old_target_freq = target_freq;

 if (cpufreq_disabled())
  return -ENODEV;

 /* Make sure that target_freq is within supported range */
 if (target_freq > policy->max)
  target_freq = policy->max;
 if (target_freq < policy->min)
  target_freq = policy->min;

 pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
   policy->cpu, target_freq, relation, old_target_freq);

 if (target_freq == policy->cur)
  return 0;

 if (cpufreq_driver->target)
  retval = cpufreq_driver->target(policy, target_freq, relation);

 return retval;
}
EXPORT_SYMBOL_GPL(__cpufreq_driver_target);

int cpufreq_driver_target(struct cpufreq_policy *policy,
     unsigned int target_freq,
     unsigned int relation)
{
 int ret = -EINVAL;

 policy = cpufreq_cpu_get(policy->cpu);
 if (!policy)
  goto no_policy;

 if (unlikely(lock_policy_rwsem_write(policy->cpu)))
  goto fail;

 ret = __cpufreq_driver_target(policy, target_freq, relation);

 unlock_policy_rwsem_write(policy->cpu);

fail:
 cpufreq_cpu_put(policy);
no_policy:
 return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_driver_target);

int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
{
 int ret = 0;

 if (cpufreq_disabled())
  return ret;

 if (!cpufreq_driver->getavg)
  return 0;

 policy = cpufreq_cpu_get(policy->cpu);
 if (!policy)
  return -EINVAL;

 ret = cpufreq_driver->getavg(policy, cpu);

 cpufreq_cpu_put(policy);
 return ret;
}
EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);

/*
 * when "event" is CPUFREQ_GOV_LIMITS
 */

static int __cpufreq_governor(struct cpufreq_policy *policy,
     unsigned int event)
{
 int ret;

 /* Only must be defined when default governor is known to have latency
    restrictions, like e.g. conservative or ondemand.
    That this is the case is already ensured in Kconfig
 */
#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
 struct cpufreq_governor *gov = &cpufreq_gov_performance;
#else
 struct cpufreq_governor *gov = NULL;
#endif

 if (policy->governor->max_transition_latency &&
     policy->cpuinfo.transition_latency >
     policy->governor->max_transition_latency) {
  if (!gov)
   return -EINVAL;
  else {
   printk(KERN_WARNING "%s governor failed, too long"
          " transition latency of HW, fallback"
          " to %s governor\n",
          policy->governor->name,
          gov->name);
   policy->governor = gov;
  }
 }

 if (!try_module_get(policy->governor->owner))
  return -EINVAL;

 pr_debug("__cpufreq_governor for CPU %u, event %u\n",
      policy->cpu, event);

 mutex_lock(&cpufreq_governor_lock);
 if ((!policy->governor_enabled && (event == CPUFREQ_GOV_STOP)) ||
     (policy->governor_enabled && (event == CPUFREQ_GOV_START))) {
  mutex_unlock(&cpufreq_governor_lock);
  return -EBUSY;
 }

 if (event == CPUFREQ_GOV_STOP)
  policy->governor_enabled = false;
 else if (event == CPUFREQ_GOV_START)
  policy->governor_enabled = true;

 mutex_unlock(&cpufreq_governor_lock);

 ret = policy->governor->governor(policy, event);

 if (!ret) {
  if (event == CPUFREQ_GOV_POLICY_INIT)
   policy->governor->initialized++;
  else if (event == CPUFREQ_GOV_POLICY_EXIT)
   policy->governor->initialized--;
 } else {
  /* Restore original values */
  mutex_lock(&cpufreq_governor_lock);
  if (event == CPUFREQ_GOV_STOP)
   policy->governor_enabled = true;
  else if (event == CPUFREQ_GOV_START)
   policy->governor_enabled = false;
  mutex_unlock(&cpufreq_governor_lock);
 }

 /* we keep one module reference alive for
   each CPU governed by this CPU */
 if ((event != CPUFREQ_GOV_START) || ret)
  module_put(policy->governor->owner);
 if ((event == CPUFREQ_GOV_STOP) && !ret)
  module_put(policy->governor->owner);

 return ret;
}


int cpufreq_register_governor(struct cpufreq_governor *governor)
{
 int err;

 if (!governor)
  return -EINVAL;

 if (cpufreq_disabled())
  return -ENODEV;

 mutex_lock(&cpufreq_governor_mutex);

 governor->initialized = 0;
 err = -EBUSY;
 if (__find_governor(governor->name) == NULL) {
  err = 0;
  list_add(&governor->governor_list, &cpufreq_governor_list);
 }

 mutex_unlock(&cpufreq_governor_mutex);
 return err;
}
EXPORT_SYMBOL_GPL(cpufreq_register_governor);


void cpufreq_unregister_governor(struct cpufreq_governor *governor)
{
#ifdef CONFIG_HOTPLUG_CPU
 int cpu;
#endif

 if (!governor)
  return;

 if (cpufreq_disabled())
  return;

#ifdef CONFIG_HOTPLUG_CPU
 for_each_present_cpu(cpu) {
  if (cpu_online(cpu))
   continue;
  if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
   strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
 }
#endif

 mutex_lock(&cpufreq_governor_mutex);
 list_del(&governor->governor_list);
 mutex_unlock(&cpufreq_governor_mutex);
 return;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);

 

/*********************************************************************
 *                          POLICY INTERFACE                         *
 *********************************************************************/

/**
 * cpufreq_get_policy - get the current cpufreq_policy
 * @policy: struct cpufreq_policy into which the current cpufreq_policy
 * is written
 *
 * Reads the current cpufreq policy.
 */
int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
{
 struct cpufreq_policy *cpu_policy;
 if (!policy)
  return -EINVAL;

 cpu_policy = cpufreq_cpu_get(cpu);
 if (!cpu_policy)
  return -EINVAL;

 memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));

 cpufreq_cpu_put(cpu_policy);
 return 0;
}
EXPORT_SYMBOL(cpufreq_get_policy);


/*
 * data   : current policy.
 * policy : policy to be set.
 */
static int __cpufreq_set_policy(struct cpufreq_policy *data,
    struct cpufreq_policy *policy)
{
 int ret = 0, failed = 1;

 pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
  policy->min, policy->max);

 memcpy(&policy->cpuinfo, &data->cpuinfo,
    sizeof(struct cpufreq_cpuinfo));

 if (policy->min > data->max || policy->max < data->min) {
  ret = -EINVAL;
  goto error_out;
 }

 /* verify the cpu speed can be set within this limit */
 ret = cpufreq_driver->verify(policy);
 if (ret)
  goto error_out;

 /* adjust if necessary - all reasons */
 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
   CPUFREQ_ADJUST, policy);

 /* adjust if necessary - hardware incompatibility*/
 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
   CPUFREQ_INCOMPATIBLE, policy);

 /* verify the cpu speed can be set within this limit,
    which might be different to the first one */
 ret = cpufreq_driver->verify(policy);
 if (ret)
  goto error_out;

 /* notification of the new policy */
 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
   CPUFREQ_NOTIFY, policy);

 data->min = policy->min;
 data->max = policy->max;

 pr_debug("new min and max freqs are %u - %u kHz\n",
     data->min, data->max);

 if (cpufreq_driver->setpolicy) {
  data->policy = policy->policy;
  pr_debug("setting range\n");
  ret = cpufreq_driver->setpolicy(policy);
 } else {
  if (policy->governor != data->governor) {
   /* save old, working values */
   struct cpufreq_governor *old_gov = data->governor;

   pr_debug("governor switch\n");

   /* end old governor */
   if (data->governor) {
    __cpufreq_governor(data, CPUFREQ_GOV_STOP);
    unlock_policy_rwsem_write(policy->cpu);
    __cpufreq_governor(data,
      CPUFREQ_GOV_POLICY_EXIT);
    lock_policy_rwsem_write(policy->cpu);
   }

   /* start new governor */
   data->governor = policy->governor;
   if (!__cpufreq_governor(data, CPUFREQ_GOV_POLICY_INIT)) {
    if (!__cpufreq_governor(data, CPUFREQ_GOV_START)) {
     failed = 0;
    } else {
     unlock_policy_rwsem_write(policy->cpu);
     __cpufreq_governor(data,
       CPUFREQ_GOV_POLICY_EXIT);
     lock_policy_rwsem_write(policy->cpu);
    }
   }

   if (failed) {
    /* new governor failed, so re-start old one */
    pr_debug("starting governor %s failed\n",
       data->governor->name);
    if (old_gov) {
     data->governor = old_gov;
     __cpufreq_governor(data,
       CPUFREQ_GOV_POLICY_INIT);
     __cpufreq_governor(data,
          CPUFREQ_GOV_START);
    }
    ret = -EINVAL;
    goto error_out;
   }
   /* might be a policy change, too, so fall through */
  }
  pr_debug("governor: change or update limits\n");
  __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
 }

error_out:
 return ret;
}

/**
 * cpufreq_update_policy - re-evaluate an existing cpufreq policy
 * @cpu: CPU which shall be re-evaluated
 *
 * Useful for policy notifiers which have different necessities
 * at different times.
 */
int cpufreq_update_policy(unsigned int cpu)
{
 struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
 struct cpufreq_policy policy;
 int ret;

 if (!data) {
  ret = -ENODEV;
  goto no_policy;
 }

 if (unlikely(lock_policy_rwsem_write(cpu))) {
  ret = -EINVAL;
  goto fail;
 }

 pr_debug("updating policy for CPU %u\n", cpu);
 memcpy(&policy, data, sizeof(struct cpufreq_policy));
 policy.min = data->user_policy.min;
 policy.max = data->user_policy.max;
 policy.policy = data->user_policy.policy;
 policy.governor = data->user_policy.governor;

 /* BIOS might change freq behind our back
   -> ask driver for current freq and notify governors about a change */
 if (cpufreq_driver->get) {
  policy.cur = cpufreq_driver->get(cpu);
  if (!data->cur) {
   pr_debug("Driver did not initialize current freq");
   data->cur = policy.cur;
  } else {
   if (data->cur != policy.cur && cpufreq_driver->target)
    cpufreq_out_of_sync(cpu, data->cur,
        policy.cur);
  }
 }

 ret = __cpufreq_set_policy(data, &policy);

 unlock_policy_rwsem_write(cpu);

fail:
 cpufreq_cpu_put(data);
no_policy:
 return ret;
}
EXPORT_SYMBOL(cpufreq_update_policy);

static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
     unsigned long action, void *hcpu)
{
 unsigned int cpu = (unsigned long)hcpu;
 struct device *dev;

 dev = get_cpu_device(cpu);
 if (dev) {
  switch (action) {
  case CPU_ONLINE:
  case CPU_ONLINE_FROZEN:
   cpufreq_add_dev(dev, NULL);
   kobject_uevent(&dev->kobj, KOBJ_ADD);
   break;
  case CPU_DOWN_PREPARE:
  case CPU_DOWN_PREPARE_FROZEN:
   __cpufreq_remove_dev(dev, NULL);
   break;
  case CPU_DOWN_FAILED:
  case CPU_DOWN_FAILED_FROZEN:
   cpufreq_add_dev(dev, NULL);
   break;
  }
 }
 return NOTIFY_OK;
}

static struct notifier_block __refdata cpufreq_cpu_notifier = {
    .notifier_call = cpufreq_cpu_callback,
};

/*********************************************************************
 *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
 *********************************************************************/

/**
 * cpufreq_register_driver - register a CPU Frequency driver
 * @driver_data: A struct cpufreq_driver containing the values#
 * submitted by the CPU Frequency driver.
 *
 *   Registers a CPU Frequency driver to this core code. This code
 * returns zero on success, -EBUSY when another driver got here first
 * (and isn't unregistered in the meantime).
 *
 */
int cpufreq_register_driver(struct cpufreq_driver *driver_data)
{
 unsigned long flags;
 int ret;

 if (cpufreq_disabled())
  return -ENODEV;

 if (!driver_data || !driver_data->verify || !driver_data->init ||
     ((!driver_data->setpolicy) && (!driver_data->target)))
  return -EINVAL;

 pr_debug("trying to register driver %s\n", driver_data->name);

 if (driver_data->setpolicy)
  driver_data->flags |= CPUFREQ_CONST_LOOPS;

 write_lock_irqsave(&cpufreq_driver_lock, flags);
 if (cpufreq_driver) {
  write_unlock_irqrestore(&cpufreq_driver_lock, flags);
  return -EBUSY;
 }
 cpufreq_driver = driver_data;
 write_unlock_irqrestore(&cpufreq_driver_lock, flags);

 ret = subsys_interface_register(&cpufreq_interface);
 if (ret)
  goto err_null_driver;

 if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
  int i;
  ret = -ENODEV;

  /* check for at least one working CPU */
  for (i = 0; i < nr_cpu_ids; i++)
   if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
    ret = 0;
    break;
   }

  /* if all ->init() calls failed, unregister */
  if (ret) {
   pr_debug("no CPU initialized for driver %s\n",
       driver_data->name);
   goto err_if_unreg;
  }
 }

 register_hotcpu_notifier(&cpufreq_cpu_notifier);
 pr_debug("driver %s up and running\n", driver_data->name);

 return 0;
err_if_unreg:
 subsys_interface_unregister(&cpufreq_interface);
err_null_driver:
 write_lock_irqsave(&cpufreq_driver_lock, flags);
 cpufreq_driver = NULL;
 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);


/**
 * cpufreq_unregister_driver - unregister the current CPUFreq driver
 *
 *    Unregister the current CPUFreq driver. Only call this if you have
 * the right to do so, i.e. if you have succeeded in initialising before!
 * Returns zero if successful, and -EINVAL if the cpufreq_driver is
 * currently not initialised.
 */
int cpufreq_unregister_driver(struct cpufreq_driver *driver)
{
 unsigned long flags;

 if (!cpufreq_driver || (driver != cpufreq_driver))
  return -EINVAL;

 pr_debug("unregistering driver %s\n", driver->name);

 subsys_interface_unregister(&cpufreq_interface);
 unregister_hotcpu_notifier(&cpufreq_cpu_notifier);

 write_lock_irqsave(&cpufreq_driver_lock, flags);
 cpufreq_driver = NULL;
 write_unlock_irqrestore(&cpufreq_driver_lock, flags);

 return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);

static int __init cpufreq_core_init(void)
{
 int cpu;

 if (cpufreq_disabled())
  return -ENODEV;

 for_each_possible_cpu(cpu) {
  per_cpu(cpufreq_policy_cpu, cpu) = -1;
  init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
 }

 cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
 BUG_ON(!cpufreq_global_kobject);
 register_syscore_ops(&cpufreq_syscore_ops);

 return 0;
}
core_initcall(cpufreq_core_init);

``` [root@190f3c453709 inference]# python nf4.py /usr/local/python3.10.17/lib/python3.10/site-packages/torch_npu/utils/storage.py:38: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage() if self.device.type != 'cpu': Some weights of PanguForCausalLM were not initialized from the model checkpoint at /models/z50051264/checkpoints and are newly initialized: ['model.layers.0.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.11.self_attn.rotary_emb.inv_freq', 'model.layers.12.self_attn.rotary_emb.inv_freq', 'model.layers.13.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq', 'model.layers.16.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.18.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.21.self_attn.rotary_emb.inv_freq', 'model.layers.22.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.24.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.27.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. *****************模型加载成功! ****************[+] load time: 27.4145s 模型词汇量: 153376 Tokenizer词汇量: 153376 unk_token: <unk> pad_token: None <s>I love Hugging Face! *****************分词器加载成功,开始推理! [+] inference time: 5.57427s [' <s> 你是谁?你要我提供什么类型的内容?\n\n**回答者:人工智能助手\n\n问题有什么可以为我服务的呢?\n?\n在\n?\n\n?\n\n## \n是吗?你是一种智能机器人么 AI, [unused10]'] [root@190f3c453709 inference]# python -m pdb nf4.py > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(1)<module>() -> import time (Pdb) n > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(2)<module>() -> import torch, torch_npu (Pdb) n > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(3)<module>() -> from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig (Pdb) > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(9)<module>() -> MODEL_PATH = "/models/z50051264/checkpoints" (Pdb) > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(11)<module>() -> bnb_config = BitsAndBytesConfig( (Pdb) > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(12)<module>() -> load_in_4bit=True, (Pdb) > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(13)<module>() -> bnb_4bit_compute_dtype=torch.bfloat16, # Support torch.float16, torch.float32, torch.bfloat16 (Pdb) > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(14)<module>() -> bnb_4bit_quant_type="nf4", # # Only support `nf4` (Pdb) > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(15)<module>() -> bnb_4bit_use_double_quant=False (Pdb) > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(11)<module>() -> bnb_config = BitsAndBytesConfig( (Pdb) > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(18)<module>() -> torch.npu.synchronize() (Pdb) RuntimeError: SetPrecisionMode:build/CMakeFiles/torch_npu.dir/compiler_depend.ts:156 NPU function error: at_npu::native::AclSetCompileopt(aclCompileOpt::ACL_PRECISION_MODE, precision_mode), error code is 500001 [ERROR] 2025-07-30-07:29:05 (PID:1957, Device:0, RankID:-1) ERR00100 PTA call acl api failed [Error]: The internal ACL of the system is incorrect. Rectify the fault based on the error information in the ascend log. E90000: [PID: 1957] 2025-07-30-07:29:05.549.359 Compile operator failed, cause: module '__main__' has no attribute '__spec__' File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/common/repository_manager/interface.py", line 33, in cann_kb_init return RouteServer.initialize(**locals()) File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/common/repository_manager/route.py", line 54, in wrapper return func(cls, *args, **kwargs) File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/common/repository_manager/route.py", line 169, in initialize main_mod, main_path = config_main_info() File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/common/repository_manager/utils/common.py", line 37, in config_main_info main_module_name = getattr(main_module.__spec__, "name", None) TraceBack (most recent call last): AOE Failed to call InitCannKB[FUNC:Initialize][FILE:python_adapter_manager.cc][LINE:47] Failed to initialize TeConfigInfo. [GraphOpt][InitializeInner][InitTbeFunc] Failed to init tbe.[FUNC:InitializeTeFusion][FILE:tbe_op_store_adapter.cc][LINE:1889] [GraphOpt][InitializeInner][InitTeFusion]: Failed to initialize TeFusion.[FUNC:InitializeInner][FILE:tbe_op_store_adapter.cc][LINE:1856] [SubGraphOpt][PreCompileOp][InitAdapter] InitializeAdapter adapter [tbe_op_adapter] failed! Ret [4294967295][FUNC:InitializeAdapter][FILE:op_store_adapter_manager.cc][LINE:79] [SubGraphOpt][PreCompileOp][Init] Initialize op store adapter failed, OpsStoreName[tbe-custom].[FUNC:Initialize][FILE:op_store_adapter_manager.cc][LINE:120] [FusionMngr][Init] Op store adapter manager init failed.[FUNC:Initialize][FILE:fusion_manager.cc][LINE:115] PluginManager InvokeAll failed.[FUNC:Initialize][FILE:ops_kernel_manager.cc][LINE:83] OpsManager initialize failed.[FUNC:InnerInitialize][FILE:gelib.cc][LINE:259] GELib::InnerInitialize failed.[FUNC:Initialize][FILE:gelib.cc][LINE:184] GEInitialize failed.[FUNC:GEInitialize][FILE:ge_api.cc][LINE:371] [Initialize][Ge]GEInitialize failed. ge result = 4294967295[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161] [Init][Compiler]Init compiler failed[FUNC:ReportInnerError][FILE:log_inner.cpp][LINE:145] [Set][Options]OpCompileProcessor init failed![FUNC:ReportInnerError][FILE:log_inner.cpp][LINE:145] > /models/z50051264/bitsandbytes-pangu/examples/inference/nf4.py(18)<module>() -> torch.npu.synchronize() (Pdb) ``` 为什么我直接运行没问题,但是使用pdb调试就会报错???
最新发布
07-31
<think>我们面对的问题:为什么在使用 pdb 调试 Python 脚本时出现 RuntimeError: SetPrecisionMode 错误,而直接运行却没有问题? 根据用户描述,他们在使用pdb调试时遇到了一个RuntimeError,错误信息为“SetPrecisionMode”,而直接运行脚本则没有问题。这个错误通常与某些底层库(如NumPy、TensorFlow等)设置浮点运算精度模式有关。在调试模式下,由于pdb会中断程序执行,可能导致某些库在尝试设置精度模式时因环境变化而失败。 分析可能的原因: 1. **调试器中断导致上下文变化**:当使用pdb调试时,程序会在断点处暂停,此时如果有一些依赖于连续执行的操作(例如,设置硬件浮点精度模式),可能会因为中断而无法正确完成。 2. **多线程/异步问题**:如果程序中有多线程或异步操作,调试时可能会改变执行顺序,导致某些操作在不恰当的时机被调用。 3. **环境差异**:调试时可能加载了额外的模块(如pdb本身),或者改变了某些环境变量,导致依赖库的行为不同。 根据引用[4]中提到,pdb允许在特定条件满足时才中断,这可能会改变程序的执行流程。另外,引用[2]中提到了在Abaqus脚本中使用pdb设置断点的方法,说明在科学计算环境中使用pdb是常见的,但可能遇到环境问题。 具体到“SetPrecisionMode”错误,这通常与使用GPU计算库(如CUDA)有关,这些库允许设置浮点运算的精度模式(例如,在PyTorch或TensorFlow中)。在直接运行时,这些设置可能一次性完成,而在调试模式下,由于中断,可能导致设置过程被干扰。 解决方案建议: 1. **避免在设置精度的代码附近中断**:如果知道哪一行代码在设置精度,避免在那里设置断点。 2. **使用条件断点**:如引用[4]所述,可以设置条件断点,使得在满足特定条件时才中断,从而避免在设置精度时中断。 3. **尝试其他调试方法**:如使用日志输出,或者使用其他调试器(如PyCharm的调试器)可能避免此问题。 4. **检查库的文档**:查看出现错误的库(例如,如果错误来自TensorFlow或PyTorch)的文档,看是否有关于调试模式下设置精度的注意事项。 由于用户没有提供具体的代码和错误堆栈,我们只能根据常见情况推测。 相关问题: 1. 在调试模式下,为什么会出现与硬件设置相关的错误? 2. 如何避免在调试过程中因中断导致的资源设置错误? 3. 除了pdb,还有哪些Python调试工具可以用于调试科学计算程序? 注意:用户的问题中提到的错误信息是“RuntimeError: SetPrecisionMode”,这通常与GPU计算相关,例如在深度学习框架中设置浮点精度(如FP16、FP32等)。因此,我们假设用户正在使用类似TensorFlow或PyTorch的库。 如果用户能够提供更多信息,如错误堆栈和使用的库,我们可以给出更精确的解答。</think>### 为什么调试时出现 `RuntimeError: SetPrecisionMode` 错误? 在使用 pdb 调试 Python 脚本时出现 `RuntimeError: SetPrecisionMode` 错误而直接运行无异常,通常与**调试器中断导致硬件资源状态冲突**有关。以下是具体原因和解决方案: #### 原因分析 1. **硬件资源锁定冲突** - 某些数值计算库(如 `NumPy`、`TensorFlow`、`PyTorch`)在初始化时会调用底层硬件指令(如 `SetPrecisionMode`)设置浮点运算精度。 - **直接运行**:库能独占硬件资源完成初始化。 - **pdb 调试时**:断点中断了初始化流程,导致硬件资源状态被锁定或未正确释放。当调试器尝试恢复执行时,二次调用精度设置指令会触发冲突错误[^2][^4]。 2. **调试环境差异** - pdb 会注入调试代码,改变 Python 解释器的执行上下文(如堆栈管理、线程调度),可能干扰底层库对硬件资源的访问顺序[^1][^4]。 3. **并发/多线程问题** 若脚本涉及多线程或异步操作,调试中断可能导致: - 子线程未同步挂起,继续占用硬件资源。 - 主线程恢复时与子线程竞争资源,触发错误[^4]。 #### 解决方案 1. **避免在硬件初始化代码处中断** - 定位调用 `SetPrecisionMode` 的代码行(通常出现在数值库导入后的初始化阶段)。 - 使用 **条件断点** 跳过该位置: ```python import pdb; pdb.set_trace(condition="not 'numpy' in locals()") # 初始化完成后才中断[^4] ``` 2. **改用非侵入式调试** - **日志调试**:在关键位置添加日志输出替代断点。 ```python import logging logging.basicConfig(level=logging.DEBUG) ``` - **VSCode/PyCharm 调试器**:图形化调试器可能更妥善处理硬件资源中断[^2]。 3. **显式释放资源** 在断点前手动清理硬件状态(需根据库文档实现): ```python import ctypes ctypes.CDLL('libc.so.6').fesetenv(0) # 重置浮点环境(Linux示例) ``` 4. **环境隔离** 通过 `subprocess` 隔离硬件相关操作: ```python import subprocess subprocess.run(["python", "hardware_module.py"]) # 在独立进程运行敏感代码 ``` #### 根本原因验证 若问题由特定库引起,可通过最小化复现代码定位: ```python # test.py import numpy as np import pdb; pdb.set_trace() # 在此中断会触发错误 a = np.array([1.0], dtype=np.float32) ``` 直接运行正常,但 pdb 调试报错即确认库冲突。 --- ### 相关问题 1. 如何调试涉及硬件资源(如GPU)的 Python 程序? 2. 在 pdb 中如何安全调试多线程/异步代码? 3. `NumPy`/`PyTorch` 初始化时有哪些可能冲突的底层操作? [^1]: 递归深度超限可能与调试时堆栈跟踪增加有关。 [^2]: pdb 断点会暂停脚本执行,改变运行时环境。 [^4]: 条件断点可避免在敏感代码位置中断。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值