Linux内核Thermal框架详解十二、Thermal Governor(2)
本文部分内容参考
Linux Thermal机制源码分析之框架概述_不捡风筝的玖伍贰柒的博客-CSDN博客,
“热散由心静,凉生为室空” - linux温控的那些事儿_内核工匠的博客-CSDN博客
特此致谢!
接前一篇文章Linux内核Thermal框架详解十二、Thermal Governor(2)
二、具体温控策略
上一篇文章对于Linux 内核中的Thermal Governor进行了总体介绍,从本文开始将对于具体的5种策略的每一种进行介绍及详解。
采用由浅入深、由易到难的方法,先介绍最为简单的bang_bang策略。
1. bang_bang
bang_bang是在使用风扇进行散热的设备场景中的算法。bang_bang governor的降温策略跟它的名字一样简单,就下边两条(也可以合并成一条):
- 当throttle发生,打开风扇;
- 当throttle解除,关闭风扇。
首先需要确定throttle即温控是否触发。触发的情况又包括了两种细分情况:(1)当前温度大于温控阈值;(2)当前温度小于温控阈值但是大于滞后温度(温控解除温度),并且是处于降温的过程中。
如下流程所示:
bang_bang governor的代码在drivers/thermal/gov_bang_bang.c中,一共才120行,有效代码不到100行。如下所示:
#include <linux/thermal.h>
#include "thermal_core.h"
static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
int trip_temp, trip_hyst;
struct thermal_instance *instance;
tz->ops->get_trip_temp(tz, trip, &trip_temp);
if (!tz->ops->get_trip_hyst) {
pr_warn_once("Undefined get_trip_hyst for thermal zone %s - "
"running with default hysteresis zero\n", tz->type);
trip_hyst = 0;
} else
tz->ops->get_trip_hyst(tz, trip, &trip_hyst);
dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n",
trip, trip_temp, tz->temperature,
trip_hyst);
mutex_lock(&tz->lock);
list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
if (instance->trip != trip)
continue;
/* in case fan is in initial state, switch the fan off */
if (instance->target == THERMAL_NO_TARGET)
instance->target = 0;
/* in case fan is neither on nor off set the fan to active */
if (instance->target != 0 && instance->target != 1) {
pr_warn("Thermal instance %s controlled by bang-bang has unexpected state: %ld\n",
instance->name, instance->target);
instance->target = 1;
}
/*
* enable fan when temperature exceeds trip_temp and disable
* the fan in case it falls below trip_temp minus hysteresis
*/
if (instance->target == 0 && tz->temperature >= trip_temp)
instance->target = 1;
else if (instance->target == 1 &&
tz->temperature <= trip_temp - trip_hyst)
instance->target = 0;
dev_dbg(&instance->cdev->device, "target=%d\n",
(int)instance->target);
mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false; /* cdev needs update */
mutex_unlock(&instance->cdev->lock);
}
mutex_unlock(&tz->lock);
}
/**
* bang_bang_control - controls devices associated with the given zone
* @tz: thermal_zone_device
* @trip: the trip point
*
* Regulation Logic: a two point regulation, deliver cooling state depending
* on the previous state shown in this diagram:
*
* Fan: OFF ON
*
* |
* |
* trip_temp: +---->+
* | | ^
* | | |
* | | Temperature
* (trip_temp - hyst): +<----+
* |
* |
* |
*
* * If the fan is not running and temperature exceeds trip_temp, the fan
* gets turned on.
* * In case the fan is running, temperature must fall below
* (trip_temp - hyst) so that the fan gets turned off again.
*
*/
static int bang_bang_control(struct thermal_zone_device *tz, int trip)
{
struct thermal_instance *instance;
thermal_zone_trip_update(tz, trip);
mutex_lock(&tz->lock);
list_for_each_entry(instance, &tz->thermal_instances, tz_node)
thermal_cdev_update(instance->cdev);
mutex_unlock(&tz->lock);
return 0;
}
static struct thermal_governor thermal_gov_bang_bang = {
.name = "bang_bang",
.throttle = bang_bang_control,
};
THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang);
麻雀虽小,五脏俱全。别看代码行数比较少,但是背后的机制却并不简单。一段一段来进行分析。
(1)THERMAL_GOVERNOR_DECLARE相关代码
先来看THERMAL_GOVERNOR_DECLARE。它是一个宏定义,在drivers/thermal/thermal_core.h中,代码如下:
/* Init section thermal table */
extern struct thermal_governor *__governor_thermal_table[];
extern struct thermal_governor *__governor_thermal_table_end[];
#define THERMAL_TABLE_ENTRY(table, name) \
static typeof(name) *__thermal_table_entry_##name \
__used __section("__" #table "_thermal_table") = &name
#define THERMAL_GOVERNOR_DECLARE(name) THERMAL_TABLE_ENTRY(governor, name)
实际上这段代码在前文Linux内核Thermal框架详解四、Thermal Core(3)中已经进行了详细分析,这里就不再赘述了。不过为了便于理解和加深印象,将bang_bang governor展开后的代码再次列出:
static struct thermal_governor thermal_gov_bang_bang = {
.name = "bang_bang",
.throttle = bang_bang_control,
};
static struct thermal_governor *__thermal_table_entry_thermal_gov_bang_bang \
__used __section("__governor_thermal_table") = &thermal_gov_bang_bang
Thermal Governor都是通过THERMAL_GOVERNOR_DECLARE定义到了__governor_thermal_table这段空间内。然后在thermal core初始化时通过调用thermal_register_governors来注册到thermal_governor_list链表中。再之后通过经由“thermal_init->thermal_register_governors-> thermal_set_governor”路径和thermal zone device关联上。
(2)handle_non_critical_trips
struct thermal_governor中有一个成员throttle,其是一个函数指针:
int (*throttle)(struct thermal_zone_device *tz, int trip);
对于对象thermal_gov_bang_bang来说,指向了bang_bang_control函数。在解析bang_bang_control函数之前,有一个问题必须弄清楚:这个函数是何时被调用的?
是在drivers/thermal/thermal_core.c的handle_non_critical_trips函数中,代码如下:
static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip)
{
tz->governor ? tz->governor->throttle(tz, trip) :
def_governor->throttle(tz, trip);
}
那么又是哪里调用的handle_non_critical_trips?是在drivers/thermal/thermal_core.c的handle_thermal_trip函数中,代码如下:
static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
{
enum thermal_trip_type type;
int trip_temp, hyst = 0;
/* Ignore disabled trip points */
if (test_bit(trip, &tz->trips_disabled))
return;
tz->ops->get_trip_temp(tz, trip, &trip_temp);
tz->ops->get_trip_type(tz, trip, &type);
if (tz->ops->get_trip_hyst)
tz->ops->get_trip_hyst(tz, trip, &hyst);
if (tz->last_temperature != THERMAL_TEMP_INVALID) {
if (tz->last_temperature < trip_temp &&
tz->temperature >= trip_temp)
thermal_notify_tz_trip_up(tz->id, trip,
tz->temperature);
if (tz->last_temperature >= trip_temp &&
tz->temperature < (trip_temp - hyst))
thermal_notify_tz_trip_down(tz->id, trip,
tz->temperature);
}
if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
handle_critical_trips(tz, trip, type);
else
handle_non_critical_trips(tz, trip);
/*
* Alright, we handled this trip successfully.
* So, start monitoring again.
*/
monitor_thermal_zone(tz);
}
对于handle_thermal_trip函数的详细分析有专门的文章章节,由于本篇文章专注于bang_bang governor,故在此不深入展开。
(3)bang_bang_control
再贴一下此函数代码:
/**
* bang_bang_control - controls devices associated with the given zone
* @tz: thermal_zone_device
* @trip: the trip point
*
* Regulation Logic: a two point regulation, deliver cooling state depending
* on the previous state shown in this diagram:
*
* Fan: OFF ON
*
* |
* |
* trip_temp: +---->+
* | | ^
* | | |
* | | Temperature
* (trip_temp - hyst): +<----+
* |
* |
* |
*
* * If the fan is not running and temperature exceeds trip_temp, the fan
* gets turned on.
* * In case the fan is running, temperature must fall below
* (trip_temp - hyst) so that the fan gets turned off again.
*
*/
static int bang_bang_control(struct thermal_zone_device *tz, int trip)
{
struct thermal_instance *instance;
thermal_zone_trip_update(tz, trip);
mutex_lock(&tz->lock);
list_for_each_entry(instance, &tz->thermal_instances, tz_node)
thermal_cdev_update(instance->cdev);
mutex_unlock(&tz->lock);
return 0;
}
函数注释已经将函数功能说得很清楚了:控制与给定thermal zone相关联的设备。调节逻辑如下:
两点调节,传递基于前一状态的冷却状态(cooling state)。前一状态参见函数代码中用字符画出的图。
若风扇并未运行并且温度超过了trip_temp(触发点温度),则风扇打开;
若风扇已在运行,温度必须降至(trip_temp - hyst)(触发点温度 - 滞回),风扇才能关闭。
(4)thermal_zone_trip_update
bang_bang_control函数中调用了thermal_zone_trip_update函数,这个函数就在bang_bang_control函数的上边,再次贴出其代码:
static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
int trip_temp, trip_hyst;
struct thermal_instance *instance;
tz->ops->get_trip_temp(tz, trip, &trip_temp);
if (!tz->ops->get_trip_hyst) {
pr_warn_once("Undefined get_trip_hyst for thermal zone %s - "
"running with default hysteresis zero\n", tz->type);
trip_hyst = 0;
} else
tz->ops->get_trip_hyst(tz, trip, &trip_hyst);
dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n",
trip, trip_temp, tz->temperature,
trip_hyst);
mutex_lock(&tz->lock);
list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
if (instance->trip != trip)
continue;
/* in case fan is in initial state, switch the fan off */
if (instance->target == THERMAL_NO_TARGET)
instance->target = 0;
/* in case fan is neither on nor off set the fan to active */
if (instance->target != 0 && instance->target != 1) {
pr_warn("Thermal instance %s controlled by bang-bang has unexpected state: %ld\n",
instance->name, instance->target);
instance->target = 1;
}
/*
* enable fan when temperature exceeds trip_temp and disable
* the fan in case it falls below trip_temp minus hysteresis
*/
if (instance->target == 0 && tz->temperature >= trip_temp)
instance->target = 1;
else if (instance->target == 1 &&
tz->temperature <= trip_temp - trip_hyst)
instance->target = 0;
dev_dbg(&instance->cdev->device, "target=%d\n",
(int)instance->target);
mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false; /* cdev needs update */
mutex_unlock(&instance->cdev->lock);
}
mutex_unlock(&tz->lock);
}
thermal_zone_trip_update函数做了以下几件事情:
1)获取触发温度
通过tz->ops->get_trip_temp(tz, trip, &trip_temp)获取所属thermal zone的trip temp。
2)获取滞回区间
如果所属thermal zone提供了获取滞回的函数,则调用此函数tz->ops->get_trip_hyst(tz, trip, &trip_hyst);否则滞回区间默认为0。
3)设置初始状态以及异常状态下的值
遍历该thermal zone下的各个instance,将初始状态下(instance->target为THERMAL_NO_TARGET)的值由THERMAL_NO_TARGET设置为0。THERMAL_NO_TARGET为宏定义,在drivers/thermal/thermal_core.h中,如下所示:
/* Initial state of a cooling device during binding */
#define THERMAL_NO_TARGET -1UL
并且将那些风扇状态既不是关闭(instance->target != 0)又不是打开(instance->target != 1)的状态设置为打开(instance->target = 1)。
4)按照函数说明中的策略更新状态
对应代码如下:
/*
* enable fan when temperature exceeds trip_temp and disable
* the fan in case it falls below trip_temp minus hysteresis
*/
if (instance->target == 0 && tz->temperature >= trip_temp)
instance->target = 1;
else if (instance->target == 1 &&
tz->temperature <= trip_temp - trip_hyst)
instance->target = 0;
这段代码就对应了函数说明中的规则:
若风扇并未运行并且温度超过了trip_temp(触发点温度),则风扇打开;若风扇已在运行,温度必须降至(trip_temp - hyst)(触发点温度 - 滞回),风扇才能关闭。
不过此处还只是设置了instance->target的值,实际的操作风扇的动作要由之后的函数来完成。
5)设置instance的cdev状态
mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false; /* cdev needs update */
mutex_unlock(&instance->cdev->lock);
设置instance对应的cdev的状态为false,即需要更新。为接下来控制风扇动作的函数做准备。
(5)thermal_cdev_update
thermal_cdev_update函数在drivers/thermal/thermal_helpers.c中,代码如下:
/**
* thermal_cdev_update - update cooling device state if needed
* @cdev: pointer to struct thermal_cooling_device
*
* Update the cooling device state if there is a need.
*/
void thermal_cdev_update(struct thermal_cooling_device *cdev)
{
mutex_lock(&cdev->lock);
if (!cdev->updated) {
__thermal_cdev_update(cdev);
cdev->updated = true;
}
mutex_unlock(&cdev->lock);
}
EXPORT_SYMBOL(thermal_cdev_update);
工作实际是交给了__thermal_cdev_update函数来完成。该函数就在它的上边,代码如下:
void __thermal_cdev_update(struct thermal_cooling_device *cdev)
{
struct thermal_instance *instance;
unsigned long target = 0;
/* Make sure cdev enters the deepest cooling state */
list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
dev_dbg(&cdev->device, "zone%d->target=%lu\n",
instance->tz->id, instance->target);
if (instance->target == THERMAL_NO_TARGET)
continue;
if (instance->target > target)
target = instance->target;
}
thermal_cdev_set_cur_state(cdev, target);
trace_cdev_update(cdev, target);
dev_dbg(&cdev->device, "set to state %lu\n", target);
}
thermal_cdev_set_cur_state函数又在__thermal_cdev_update函数的上边,代码如下:
static void thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev,
int target)
{
if (cdev->ops->set_cur_state(cdev, target))
return;
thermal_notify_cdev_state_update(cdev->id, target);
thermal_cooling_device_stats_update(cdev, target);
}
thermal_notify_cdev_state_update函数在drivers/thermal/thermal_netlink.c中,代码如下:
int thermal_notify_cdev_state_update(int cdev_id, int cdev_state)
{
struct param p = { .cdev_id = cdev_id, .cdev_state = cdev_state };
return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, &p);
}
thermal_netlink是不是很眼熟,在“当初”介绍thermal_init函数的时候分析过,此处用上了。正所谓“草蛇灰线,伏脉千里”。
thermal_cooling_device_stats_update函数在drivers/thermal/thermal_sysfs.c中,代码如下;
void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
unsigned long new_state)
{
struct cooling_dev_stats *stats = cdev->stats;
if (!stats)
return;
spin_lock(&stats->lock);
if (stats->state == new_state)
goto unlock;
update_time_in_state(stats);
stats->trans_table[stats->state * stats->max_states + new_state]++;
stats->state = new_state;
stats->total_trans++;
unlock:
spin_unlock(&stats->lock);
}
__thermal_cdev_update函数中的trace_cdev_update函数在include/trace/events/thermal.h中,代码如下:
TRACE_EVENT(cdev_update,
TP_PROTO(struct thermal_cooling_device *cdev, unsigned long target),
TP_ARGS(cdev, target),
TP_STRUCT__entry(
__string(type, cdev->type)
__field(unsigned long, target)
),
TP_fast_assign(
__assign_str(type, cdev->type);
__entry->target = target;
),
TP_printk("type=%s target=%lu", __get_str(type), __entry->target)
);
后边这几个函数只列出源码了,不做详细分析。通过后边这几个函数应该能看出或者感到:具体完成控制风扇打开或关闭的动作并不是一蹴而就、在Thermal框架中完成的,而是通过Linux内核的通知机制异步来完成的。
至此,bang_bang governor就全部分析完了。
相关文章
- linux(centos8):用grep命令查找文件内容
- Linux系统与windows系统文件同步
- linux终端中如何将多行shell脚本通过管道执行
- Linux内核中网络数据包的接收-第一部分 概念和框架
- C语言之linux内核实现平方根计算算法
- 【Linux 内核】调度器 ① ( 调度器概念 | 调度器目的 | 调度器主要工作 | 调度器位置 | 进程优先级 | 抢占式调度器 | Linux 进程状态 | Linux 内核进程状态 )
- L71.linux命令每日一练 -- 第十章 Linux网络管理命令 -- wget和mailq
- L61.linux命令每日一练 -- 第九章 Linux进程管理命令 -- renice和nohup
- Linux内核Thermal框架详解十五、Thermal Governor(5)
- Linux内核Thermal框架详解十一、Thermal Governor(1)
- Linux内核Thermal框架详解七、Thermal Core(6)
- 【Linux驱动开发100问】如何编译Linux内核?
- Linux内核Thermal框架详解七、Thermal Core(6)
- Linux内核排错
- 详解Linux内核态调试工具kdump