cpu_freq之切换governor.

本文详细解析了Linux系统中CPUFreq策略(governor)的切换机制,包括如何通过文件系统或内核空间来设置不同的策略,以及核心函数cpufreq_set_policy的具体实现。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

系统中提供五个governor(conservative ondemand userspace powersave performance )策略来选择,但是具体是在哪里选择的呢?
不管从哪里选择都是要调用cpufreq_set_policy来设定governor
 static int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_policy *new_policy)
下来看看具体都是谁调用cpufreq_set_policy
1):可以在user space设定min_freq和max_freq时候来改变governor
635 #define store_one(file_name, object)                    \
636 static ssize_t store_##file_name                                        \
637 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
648         temp = new_policy.object;                                       \
649         ret = cpufreq_set_policy(policy, &new_policy);          \
650         if (!ret)                                                       \
651                 policy->user_policy.object = temp;                      \
652                                                                         \
653         return ret ? ret : count;                                       \
654 }
655 
656 store_one(scaling_min_freq, min);
657 store_one(scaling_max_freq, max);


2):user space 直接通过文件系统的scaling_governor 来设定governor.
689 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
690                                         const char *buf, size_t count)
691 {
706         ret = cpufreq_set_policy(policy, &new_policy);
707         return ret ? ret : count;
708 }
3):调用cpufreq_register_driver的时候会调用cpufreq_init_policy 来设定一个默认的governor.
993 static int cpufreq_init_policy(struct cpufreq_policy *policy)
994 {
1021         /* set default policy */
1022         return cpufreq_set_policy(policy, &new_policy);
1023 }
4):可以调用kernel space直接调用cpufreq_update_policy 来更新governor.
2243 int cpufreq_update_policy(unsigned int cpu)
2244 {
2274 
2275         ret = cpufreq_set_policy(policy, &new_policy);
2276 
2277 unlock:
2278         up_write(&policy->rwsem);
2279 
2280         cpufreq_cpu_put(policy);
2281         return ret;
2282 }
可见都是主动调用cpufreq_set_policy来设置governor,kernel 不会自动切换governor。
下来我们看看cpufreq_set_policy是如何切换cpufreq_set_policy的。


2164行调用cpufreq_driver->verify验证cpu speed是否在合理范围内.
2169行发送通知链告诉其他相关模块现在要开始调整cpufreq了.
2176行再次调用cpufreq_driver->verify验证cpu speed是否在合理范围内
2181行发送CPUFREQ_NOTIFY通知链
2190行如果driver的setplicy不为NULL 则调用cpufreq_driver->setpolicy
2196行如果新旧governor是一样的,则调用cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)。后面在分析CPUFREQ_GOV_LIMITS是啥意思.
2204行保存旧的governor.
2206行如果旧的governor不为null,则停止然后退出旧的governor.
2213行调用cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT),然后开始cpufreq_start_governor(policy) 这个governor。
如果成功的话,就在2218行退出了
从2225行到2233行,如果新的governor 切换失败,就从来开始旧的governor.
2145 static int cpufreq_set_policy(struct cpufreq_policy *policy,
2146                                 struct cpufreq_policy *new_policy)
2147 {
2148         struct cpufreq_governor *old_gov;
2149         int ret;
2150 
2151         pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
2152                  new_policy->cpu, new_policy->min, new_policy->max);
2153 
2154         memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
2155 
2156         /*
2157         * This check works well when we store new min/max freq attributes,
2158         * because new_policy is a copy of policy with one field updated.
2159         */
2160         if (new_policy->min > new_policy->max)
2161                 return -EINVAL;
2162 
2163         /* verify the cpu speed can be set within this limit */
2164         ret = cpufreq_driver->verify(new_policy);
2165         if (ret)
2166                 return ret;
2167 
2168         /* adjust if necessary - all reasons */
2169         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
2170                         CPUFREQ_ADJUST, new_policy);
2171 
2172         /*
2173          * verify the cpu speed can be set within this limit, which might be
2174          * different to the first one
2175          */
2176         ret = cpufreq_driver->verify(new_policy);
2177         if (ret)
2178                 return ret;
2179 
2180         /* notification of the new policy */
2181         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
2182                         CPUFREQ_NOTIFY, new_policy);
2183 
2184         policy->min = new_policy->min;
2185         policy->max = new_policy->max;
2186 
2187         pr_debug("new min and max freqs are %u - %u kHz\n",
2188                  policy->min, policy->max);
2189 
2190         if (cpufreq_driver->setpolicy) {
2191                 policy->policy = new_policy->policy;
2192                 pr_debug("setting range\n");
2193                 return cpufreq_driver->setpolicy(new_policy);
2194         }
2195 
2196         if (new_policy->governor == policy->governor) {
2197                 pr_debug("cpufreq: governor limits update\n");
2198                 return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
2199         }
2200 
2201         pr_debug("governor switch\n");
2202 
2203         /* save old, working values */
2204         old_gov = policy->governor;
2205         /* end old governor */
2206         if (old_gov) {
2207                 cpufreq_stop_governor(policy);
2208                 cpufreq_exit_governor(policy);
2209         }
2210 
2211         /* start new governor */
2212         policy->governor = new_policy->governor;
2213         ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
2214         if (!ret) {
2215                 ret = cpufreq_start_governor(policy);
2216                 if (!ret) {
2217                         pr_debug("cpufreq: governor change\n");
2218                         return 0;
2219                 }
2220                 cpufreq_exit_governor(policy);
2221         }
2222 
2223         /* new governor failed, so re-start old one */
2224         pr_debug("starting governor %s failed\n", policy->governor->name);
2225         if (old_gov) {
2226                 policy->governor = old_gov;
2227                 if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
2228                         policy->governor = NULL;
2229                 else
2230                         cpufreq_start_governor(policy);
2231         }
2232 
2233         return ret;
2234 }


下来我们看看cpufreq_governor 这个函数具体做了啥事.
2005行如果系统已经开始suspend,则退出.
2011行如果新的pplicy->governor 为NULL,则退出.
2014~2026是判断hw的transition latency是否太长,一般走不到。
2034行调用具体governor来切换governor.
2036~2043处理CPUFREQ_GOV_POLICY_INIT和CPUFREQ_GOV_POLICY_EXIT。模块技术的加和减,奇怪的是没有处理CPUFREQ_GOV_LIMITS,所以这个event 肯定是具体的governor来处理的.


2000 static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
2001 {
2002         int ret;
2003 
2004         /* Don't start any governor operations if we are entering suspend */
2005         if (cpufreq_suspended)
2006                 return 0;
2007         /*
2008          * Governor might not be initiated here if ACPI _PPC changed
2009          * notification happened, so check it.
2010          */
2011         if (!policy->governor)
2012                 return -EINVAL;
2013 
2014         if (policy->governor->max_transition_latency &&
2015             policy->cpuinfo.transition_latency >
2016             policy->governor->max_transition_latency) {
2017                 struct cpufreq_governor *gov = cpufreq_fallback_governor();
2018 
2019                 if (gov) {
2020                         pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n",
2021                                 policy->governor->name, gov->name);
2022                         policy->governor = gov;
2023                 } else {
2024                         return -EINVAL;
2025                 }
2026         }
2027 
2028         if (event == CPUFREQ_GOV_POLICY_INIT)
2029                 if (!try_module_get(policy->governor->owner))
2030                         return -EINVAL;
2031 
2032         pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event);
2033 
2034         ret = policy->governor->governor(policy, event);
2035 
2036         if (event == CPUFREQ_GOV_POLICY_INIT) {
2037                 if (ret)
2038                         module_put(policy->governor->owner);
2039                 else
2040                         policy->governor->initialized++;
2041         } else if (event == CPUFREQ_GOV_POLICY_EXIT) {
2042                 policy->governor->initialized--;
2043                 module_put(policy->governor->owner);
2044         }
2045 
2046         return ret;
2047 }
再来看看cpufreq_start_governor。cpufreq_start_governor主要是开始一个新的policy.


2049 static int cpufreq_start_governor(struct cpufreq_policy *policy)
2050 {
2051         int ret;
2052 
2053         if (cpufreq_driver->get && !cpufreq_driver->setpolicy)
2054                 cpufreq_update_current_freq(policy);
2055 
2056         ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
2057         return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
2058 }
2054行调用cpufreq_update_current_freq来更新freq。1562行先调用get函数得到freq,这也是为什么cpufreq_start_governor中判断get函数不能为空的原因.
1555 static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy)
1556 {
1557         unsigned int new_freq;
1558 
1559         if (cpufreq_suspended)
1560                 return 0;
1561 
1562         new_freq = cpufreq_driver->get(policy->cpu);
1563         if (!new_freq)
1564                 return 0;
1565 
1566         if (!policy->cur) {
1567                 pr_debug("cpufreq: Driver did not initialize current freq\n");
1568                 policy->cur = new_freq;
1569         } else if (policy->cur != new_freq && has_target()) {
1570                 cpufreq_out_of_sync(policy, new_freq);
1571         }
1572 
1573         return new_freq;
1574 }
2056行又调用cpufreq_governor函数,不过传递的event是CPUFREQ_GOV_START,让具体的governor来处理.
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值