cpufreq子系统中,核心层会为driver和governor提供一系列的接口
一、cpufreq_register_driver
其中由cpufreq core完成的是注册和注销函数
/*********************************************************************
* REGISTER / UNREGISTER CPUFREQ DRIVER *
*********************************************************************/
/**
* cpufreq_register_driver - register a CPU Frequency driver
* @driver_data: A struct cpufreq_driver containing the values#
* submitted by the CPU Frequency driver.
*
* Registers a CPU Frequency driver to this core code. This code
* returns zero on success, -EBUSY when another driver got here first
* (and isn't unregistered in the meantime).
*
*/
// 只存在一个cpufreq
// 只负责知道如何控制该平台的时钟系统,从而设定由governor确定的工作频率
int cpufreq_register_driver(struct cpufreq_driver *driver_data)
{
unsigned long flags;
int ret;
// 平台是否禁止调频的开关
if (cpufreq_disabled())
return -ENODEV;
//
if (!driver_data || !driver_data->verify || !driver_data->init ||
!(driver_data->setpolicy || driver_data->target_index || driver_data->target) ||
(driver_data->setpolicy && (driver_data->target_index || driver_data->target)) ||
(!!driver_data->get_intermediate != !!driver_data->target_intermediate))//intermediate中间的
return -EINVAL;
pr_debug("trying to register driver %s\n", driver_data->name);
/* Protect against concurrent CPU online/offline. */
get_online_cpus();
// 读写自旋锁,同时保存中断状态,因为是简短的赋值,所以直接使用自旋锁就可以了
write_lock_irqsave(&cpufreq_driver_lock, flags);
// 系统只能有一个cpufreq_driver,所以检测全局变量cpufreq_driver是否被赋值
if (cpufreq_driver) {
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
ret = -EEXIST;
// 如果已经注册了cpufreq则退出
goto out;
}
// 如果没有,则将cpufreq_data赋值给他
cpufreq_driver = driver_data;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
// 实现了setpolicy接口的话,内核的jeffies时间就不会更新,setpolicy的接口一般不定义
if (driver_data->setpolicy)
// CPUFREQ_CONST_LOOPS是指jeffies不会随这个频率的变化而改变
driver_data->flags |= CPUFREQ_CONST_LOOPS;
// 创建/sys/devices/system/cpu/cpufreq # cat boost节点
ret = create_boost_sysfs_file();
if (ret)
goto err_null_driver;
// 在/sys/devices/system/cpu×下为每个cpu注册一个节点cpufreq的节点
ret = subsys_interface_register(&cpufreq_interface);
if (ret)
goto err_boost_unreg;
// 没有policy或者init失败就不再注册,此时也应该视为直接返回
if (!(cpufreq_driver->flags & CPUFREQ_STICKY) && list_empty(&cpufreq_policy_list)) {
/* if all ->init() calls failed, unregister */
ret = -ENODEV;
pr_debug("%s: No CPU initialized for driver %s\n", __func__, driver_data->name);
goto err_if_unreg;
}
// 注册cpu热插拔通知事件
// 在热插拔的时候可以迁移负责管理的CPU
register_hotcpu_notifier(&cpufreq_cpu_notifier);
pr_debug("driver %s up and running\n", driver_data->name);
out:
put_online_cpus();
return ret;
err_if_unreg:
subsys_interface_unregister(&cpufreq_interface);
err_boost_unreg:
remove_boost_sysfs_file();
err_null_driver:
// 这里也使用读写锁保护起来
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
goto out;//再回到out,将cpus插拔锁释放掉
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1.1subsys_interface_register
subsys_interface_register会遍历每个cpu,为每个cpu添加设备,具体详见:
https://blog.youkuaiyun.com/feifei_csdn/article/details/80915742
/**
* cpufreq_add_dev - the cpufreq interface for a CPU device.
* @dev: CPU device.
* @sif: Subsystem(子系统) interface structure pointer (not used)
*/
// 实际上就是每个cpu对应的policy,实际上会遍历每个cpu来调用该函数
static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
{
unsigned cpu = dev->id;
int ret;
dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
// 不管cpu_online或者offline,cpufreq_add_dev均会被调用,所以优先判断如果cpu是offline就
// 不用再去创建具体的policy,这个应该是在开机的时候
if (cpu_online(cpu)) {
ret = cpufreq_online(cpu);
} else {
// 针对的是offline的cpu,还是创建policy的连接
/*
* A hotplug notifier(通知) will follow and we will handle it as CPU
* online then. For now, just create the sysfs link, unless
* there is no policy or the link is already present.
*/
// SMP系统中,一个policy可以管理多个cpu,如果管理cpu创建policy的时候一并创建,后续就只需建立连接
// cpufreq_cpu_data实际上是cpufreq_policy的指针
struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
// 已经有policy并且还没有创建当前cpu的链接
ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) ? add_cpu_dev_symlink(policy, cpu) : 0;
}
return ret;
}
对于online的cpu会尝试去创建policy或者policy对应的连接
static int cpufreq_online(unsigned int cpu)
{
struct cpufreq_policy *policy;
bool new_policy;
unsigned long flags;
unsigned int j;
int ret;
pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
/* Check if this CPU already has a policy to manage it */
//核心层提供cpufreq_cpu_data是个每-cpu变量,用来保存每个cpu使用的cpufreq_policy指针
policy = per_cpu(cpufreq_cpu_data, cpu);
if (policy) {
WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
// 没有policy需要管理的cpu
if (!policy_is_inactive(policy))
return cpufreq_add_policy_cpu(policy, cpu);
/* This is the only online CPU for the policy. Start over. */
new_policy = false;
down_write(&policy->rwsem);
// 感觉不该将cpu赋值给policy->cpu????
// 感觉好像也可以,不一定管理的cpu一定要一直是cpu0
policy->cpu = cpu;
policy->governor = NULL;
up_write(&policy->rwsem);
} else {
// 第一次调用的时候,找到第一个cpu一般是cpu0,创建第一个policy
new_policy = true;
policy = cpufreq_policy_alloc(cpu);
if (!policy)
return -ENOMEM;
}
// 将同一个policy管理的cpu掩码放到policy的cpus(online)里面去
cpumask_copy(policy->cpus, cpumask_of(cpu));
/* call driver. From then on the cpufreq must be able
* to accept all calls to ->verify and ->setpolicy for this CPU
*/
// 调用init函数, 与init对应的应该是exit
ret = cpufreq_driver->init(policy);
if (ret) {
pr_debug("initialization failed\n");
goto out_free_policy;
}
// 信号量是可能引起阻塞的,但是这里多个cpu之间进程上下文进行切换所以使用信号量比较合适
down_write(&policy->rwsem);
if (new_policy) {
/* related_cpus should at least include policy->cpus. */
cpumask_copy(policy->related_cpus, policy->cpus);
//应该查一下cpumask_and函数的function
/* Remember CPUs present at the policy creation time. */
cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask);
/* Name and add the kobject */
// 创建了policy0或者policy4
ret = kobject_add(&policy->kobj, cpufreq_global_kobject, "policy%u", cpumask_first(policy->related_cpus));
if (ret) {
pr_err("%s: failed to add policy->kobj: %d\n", __func__, ret);
goto out_exit_policy;
}
}
/*
* affected cpus must always be the one, which are online. We aren't
* managing offline cpus here.
*/
//应该查一下cpumask_and函数的function
// 应该是剔除offline的cpu
cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
if (new_policy) {
policy->user_policy.min = policy->min;
policy->user_policy.max = policy->max;
write_lock_irqsave(&cpufreq_driver_lock, flags);
// 创建new_policy的时候将related的cpu一并创建,只会对online的一起创建
for_each_cpu(j, policy->related_cpus)
per_cpu(cpufreq_cpu_data, j) = policy;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
} else {
policy->min = policy->user_policy.min;
policy->max = policy->user_policy.max;
}
// 并且没有定义setpolicy的函数
if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
// policy->cur的值需要提前被赋值,后续判断是否在table中
policy->cur = cpufreq_driver->get(policy->cpu);
if (!policy->cur) {
pr_err("%s: ->get() failed\n", __func__);
goto out_exit_policy;
}
}
/*
* Sometimes boot loaders set CPU frequency to a value outside of
* frequency table present with cpufreq core. In such cases CPU might be
* unstable if it has to run on that frequency for long duration of time
* and so its better to set it to a frequency which is specified in
* freq-table. This also makes cpufreq stats inconsistent(不符合) as
* cpufreq-stats would fail to register because current frequency of CPU
* isn't found in freq-table.
*
* Because we don't want this change to effect boot process badly, we go
* for the next freq which is >= policy->cur ('cur' must be set by now,
* otherwise we will end up setting freq to lowest of the table as 'cur'
* is initialized to zero).
*
* We are passing target-freq as "policy->cur - 1" otherwise
* __cpufreq_driver_target() would simply fail, as policy->cur will be
* equal to target-freq.
*/
// 定义了target或者是target_index函数
// 在init中会设置policy->cur
if ((cpufreq_driver->flags & CPUFREQ_NEED_INITIAL_FREQ_CHECK)&& has_target()) {
/* Are we running at unknown frequency ? */
ret = cpufreq_frequency_table_get_index(policy, policy->cur);
if (ret == -EINVAL) {
/* Warn user and fix it */
pr_warn("%s: CPU%d: Running at unlisted freq: %u KHz\n", __func__, policy->cpu, policy->cur);
ret = __cpufreq_driver_target(policy, policy->cur - 1, CPUFREQ_RELATION_L);
/*
* Reaching here after boot in a few seconds may not
* mean that system will remain stable at "unknown"
* frequency for longer duration. Hence, a BUG_ON().
*/
BUG_ON(ret);
pr_warn("%s: CPU%d: Unlisted initial frequency changed to: %u KHz\n", __func__, policy->cpu, policy->cur);
}
}
// 发出CPUFREQ_START通知,通知别的模块已经给cpu准备好了policy
blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy);
if (new_policy) {
ret = cpufreq_add_dev_interface(policy);
if (ret)
goto out_exit_policy;
blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy);
write_lock_irqsave(&cpufreq_driver_lock, flags);
list_add(&policy->policy_list, &cpufreq_policy_list);
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
}
// 初始化policy
ret = cpufreq_init_policy(policy);
if (ret) {
pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
__func__, cpu, ret);
/* cpufreq_policy_free() will notify based on this */
new_policy = false;
goto out_exit_policy;
}
up_write(&policy->rwsem);
// 向用户空间发送KOBJ_ADD事件
kobject_uevent(&policy->kobj, KOBJ_ADD);
/* Callback for handling stuff after policy is ready */
if (cpufreq_driver->ready)
cpufreq_driver->ready(policy);
pr_debug("initialization complete\n");
return 0;
out_exit_policy:
up_write(&policy->rwsem);
if (cpufreq_driver->exit)
cpufreq_driver->exit(policy);
out_free_policy:
cpufreq_policy_free(policy, !new_policy);
return ret;
}
二、cpufreq_unregister_driver
注销函数和注册函数对应
/**
* cpufreq_unregister_driver - unregister the current CPUFreq driver
*
* Unregister the current CPUFreq driver. Only call this if you have
* the right to do so, i.e. if you have succeeded in initialising before!
* Returns zero if successful, and -EINVAL if the cpufreq_driver is
* currently not initialised.
*/
int cpufreq_unregister_driver(struct cpufreq_driver *driver)
{
unsigned long flags;
if (!cpufreq_driver || (driver != cpufreq_driver))
return -EINVAL;
pr_debug("unregistering driver %s\n", driver->name);
/* Protect against concurrent(同时) cpu hotplug */
// get_online_cpus 和put_online_cpus实际上是一个cpu上的加解锁
get_online_cpus();
subsys_interface_unregister(&cpufreq_interface);
remove_boost_sysfs_file();
unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
put_online_cpus();
return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
2.1subsys_interface_unregister
/**
* cpufreq_remove_dev - remove a CPU device
*
* Removes the cpufreq interface for a CPU device.
*/
static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
{
unsigned int cpu = dev->id;
struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
if (!policy)
return;
// 将cpu的cpufreq remove
if (cpu_online(cpu)) {
// 如果要移除整个cpu device,那应该和add device对应
cpufreq_offline_prepare(cpu);
cpufreq_offline_finish(cpu);
}
cpumask_clear_cpu(cpu, policy->real_cpus);
// 尤其是移除文件链接这里,是在哪里创建文件或者链接的呢?
remove_cpu_dev_symlink(policy, cpu);
// policy中不再关联任何cpu,real_cpu包含online+offline+present
if (cpumask_empty(policy->real_cpus))
cpufreq_policy_free(policy, true);
}
三、热插拔的通知事件
// 在具体的驱动中也有对应的热插拔事件通知
static int cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
cpufreq_online(cpu);
break;
case CPU_DOWN_PREPARE:
cpufreq_offline_prepare(cpu);
break;
case CPU_POST_DEAD://除了这个事件是公用的,其余的驱动中都实现了别的事件
cpufreq_offline_finish(cpu);
break;
case CPU_DOWN_FAILED:
cpufreq_online(cpu);
break;
}
return NOTIFY_OK;
}
static struct notifier_block __refdata cpufreq_cpu_notifier = {
.notifier_call = cpufreq_cpu_callback,
};
四、cpufreq_register_governor
为goovernor提供注册函数,将governor添加到链表中
int cpufreq_register_governor(struct cpufreq_governor *governor)
{
int err;
if (!governor)
return -EINVAL;
if (cpufreq_disabled())
return -ENODEV;
mutex_lock(&cpufreq_governor_mutex);
governor->initialized = 0;
err = -EBUSY;
if (!find_governor(governor->name)) {
err = 0;
list_add(&governor->governor_list, &cpufreq_governor_list);
}
mutex_unlock(&cpufreq_governor_mutex);
return err;
}
EXPORT_SYMBOL_GPL(cpufreq_register_governor);
五、cpufreq_unregister_governor
注销governor时,需要将policy和governor解除锁定,那他们是何时关联起来的呢?这个带着疑问后面去看
void cpufreq_unregister_governor(struct cpufreq_governor *governor)
{
struct cpufreq_policy *policy;
unsigned long flags;
if (!governor)
return;
if (cpufreq_disabled())
return;
/* clear last_governor for all inactive policies */
read_lock_irqsave(&cpufreq_driver_lock, flags);
for_each_inactive_policy(policy) {
if (!strcmp(policy->last_governor, governor->name)) {
policy->governor = NULL;
strcpy(policy->last_governor, "\0");
}
}
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
mutex_lock(&cpufreq_governor_mutex);
list_del(&governor->governor_list);
mutex_unlock(&cpufreq_governor_mutex);
return;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
六、get_cpu_idle_time
在interactive中,会统计cpu active的时间,里面统计cpu自启动以来的idle的总时间是核心层提供
u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
{
u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
if (idle_time == -1ULL)
return get_cpu_idle_time_jiffy(cpu, wall);
else if (!io_busy)
idle_time += get_cpu_iowait_time_us(cpu, wall);
return idle_time;
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time);
static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
{
u64 idle_time;
u64 cur_wall_time;
u64 busy_time;
cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
// 当前时间-前面几个的sum为idle的时间
idle_time = cur_wall_time - busy_time;
if (wall)
*wall = cputime_to_usecs(cur_wall_time);
return cputime_to_usecs(idle_time);
}