zl程序教程

您现在的位置是:首页 >  系统

当前栏目

Linux内核Thermal框架详解十二、Thermal Governor(2)

Linux内核框架 详解 十二
2023-09-14 09:15:40 时间

本文部分内容参考

万字长文 | Thermal框架源码剖析

Linux Thermal机制源码分析之框架概述_不捡风筝的玖伍贰柒的博客-CSDN博客

“热散由心静,凉生为室空” - linux温控的那些事儿_内核工匠的博客-CSDN博客

特此致谢!

接前一篇文章Linux内核Thermal框架详解十二、Thermal Governor(2)

二、具体温控策略

上一篇文章对于Linux 内核中的Thermal Governor进行了总体介绍,从本文开始将对于具体的5种策略的每一种进行介绍及详解。

采用由浅入深、由易到难的方法,先介绍最为简单的bang_bang策略。

1. bang_bang

bang_bang是在使用风扇进行散热的设备场景中的算法。bang_bang governor的降温策略跟它的名字一样简单,就下边两条(也可以合并成一条):

  • 当throttle发生,打开风扇;
  • 当throttle解除,关闭风扇。

首先需要确定throttle即温控是否触发。触发的情况又包括了两种细分情况:(1)当前温度大于温控阈值;(2)当前温度小于温控阈值但是大于滞后温度(温控解除温度),并且是处于降温的过程中。

如下流程所示:

bang_bang governor的代码在drivers/thermal/gov_bang_bang.c中,一共才120行,有效代码不到100行。如下所示:

#include <linux/thermal.h>

#include "thermal_core.h"

static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
	int trip_temp, trip_hyst;
	struct thermal_instance *instance;

	tz->ops->get_trip_temp(tz, trip, &trip_temp);

	if (!tz->ops->get_trip_hyst) {
		pr_warn_once("Undefined get_trip_hyst for thermal zone %s - "
				"running with default hysteresis zero\n", tz->type);
		trip_hyst = 0;
	} else
		tz->ops->get_trip_hyst(tz, trip, &trip_hyst);

	dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n",
				trip, trip_temp, tz->temperature,
				trip_hyst);

	mutex_lock(&tz->lock);

	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
		if (instance->trip != trip)
			continue;

		/* in case fan is in initial state, switch the fan off */
		if (instance->target == THERMAL_NO_TARGET)
			instance->target = 0;

		/* in case fan is neither on nor off set the fan to active */
		if (instance->target != 0 && instance->target != 1) {
			pr_warn("Thermal instance %s controlled by bang-bang has unexpected state: %ld\n",
					instance->name, instance->target);
			instance->target = 1;
		}

		/*
		 * enable fan when temperature exceeds trip_temp and disable
		 * the fan in case it falls below trip_temp minus hysteresis
		 */
		if (instance->target == 0 && tz->temperature >= trip_temp)
			instance->target = 1;
		else if (instance->target == 1 &&
				tz->temperature <= trip_temp - trip_hyst)
			instance->target = 0;

		dev_dbg(&instance->cdev->device, "target=%d\n",
					(int)instance->target);

		mutex_lock(&instance->cdev->lock);
		instance->cdev->updated = false; /* cdev needs update */
		mutex_unlock(&instance->cdev->lock);
	}

	mutex_unlock(&tz->lock);
}

/**
 * bang_bang_control - controls devices associated with the given zone
 * @tz: thermal_zone_device
 * @trip: the trip point
 *
 * Regulation Logic: a two point regulation, deliver cooling state depending
 * on the previous state shown in this diagram:
 *
 *                Fan:   OFF    ON
 *
 *                              |
 *                              |
 *          trip_temp:    +---->+
 *                        |     |        ^
 *                        |     |        |
 *                        |     |   Temperature
 * (trip_temp - hyst):    +<----+
 *                        |
 *                        |
 *                        |
 *
 *   * If the fan is not running and temperature exceeds trip_temp, the fan
 *     gets turned on.
 *   * In case the fan is running, temperature must fall below
 *     (trip_temp - hyst) so that the fan gets turned off again.
 *
 */
static int bang_bang_control(struct thermal_zone_device *tz, int trip)
{
	struct thermal_instance *instance;

	thermal_zone_trip_update(tz, trip);

	mutex_lock(&tz->lock);

	list_for_each_entry(instance, &tz->thermal_instances, tz_node)
		thermal_cdev_update(instance->cdev);

	mutex_unlock(&tz->lock);

	return 0;
}

static struct thermal_governor thermal_gov_bang_bang = {
	.name		= "bang_bang",
	.throttle	= bang_bang_control,
};
THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang);

麻雀虽小,五脏俱全。别看代码行数比较少,但是背后的机制却并不简单。一段一段来进行分析。

(1)THERMAL_GOVERNOR_DECLARE相关代码

先来看THERMAL_GOVERNOR_DECLARE。它是一个宏定义,在drivers/thermal/thermal_core.h中,代码如下:

/* Init section thermal table */
extern struct thermal_governor *__governor_thermal_table[];
extern struct thermal_governor *__governor_thermal_table_end[];

#define THERMAL_TABLE_ENTRY(table, name)			\
	static typeof(name) *__thermal_table_entry_##name	\
	__used __section("__" #table "_thermal_table") = &name

#define THERMAL_GOVERNOR_DECLARE(name)	THERMAL_TABLE_ENTRY(governor, name)

实际上这段代码在前文Linux内核Thermal框架详解四、Thermal Core(3)中已经进行了详细分析,这里就不再赘述了。不过为了便于理解和加深印象,将bang_bang governor展开后的代码再次列出:

static struct thermal_governor thermal_gov_bang_bang = {
	.name		= "bang_bang",
	.throttle	= bang_bang_control,
};

static struct thermal_governor *__thermal_table_entry_thermal_gov_bang_bang    \
    __used __section("__governor_thermal_table") = &thermal_gov_bang_bang

Thermal Governor都是通过THERMAL_GOVERNOR_DECLARE定义到了__governor_thermal_table这段空间内。然后在thermal core初始化时通过调用thermal_register_governors来注册到thermal_governor_list链表中。再之后通过经由“thermal_init->thermal_register_governors-> thermal_set_governor”路径和thermal zone device关联上。

(2)handle_non_critical_trips

struct thermal_governor中有一个成员throttle,其是一个函数指针:

int (*throttle)(struct thermal_zone_device *tz, int trip);

对于对象thermal_gov_bang_bang来说,指向了bang_bang_control函数。在解析bang_bang_control函数之前,有一个问题必须弄清楚:这个函数是何时被调用的?

是在drivers/thermal/thermal_core.c的handle_non_critical_trips函数中,代码如下:

static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip)
{
	tz->governor ? tz->governor->throttle(tz, trip) :
		       def_governor->throttle(tz, trip);
}

那么又是哪里调用的handle_non_critical_trips?是在drivers/thermal/thermal_core.c的handle_thermal_trip函数中,代码如下:

static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
{
	enum thermal_trip_type type;
	int trip_temp, hyst = 0;

	/* Ignore disabled trip points */
	if (test_bit(trip, &tz->trips_disabled))
		return;

	tz->ops->get_trip_temp(tz, trip, &trip_temp);
	tz->ops->get_trip_type(tz, trip, &type);
	if (tz->ops->get_trip_hyst)
		tz->ops->get_trip_hyst(tz, trip, &hyst);

	if (tz->last_temperature != THERMAL_TEMP_INVALID) {
		if (tz->last_temperature < trip_temp &&
		    tz->temperature >= trip_temp)
			thermal_notify_tz_trip_up(tz->id, trip,
						  tz->temperature);
		if (tz->last_temperature >= trip_temp &&
		    tz->temperature < (trip_temp - hyst))
			thermal_notify_tz_trip_down(tz->id, trip,
						    tz->temperature);
	}

	if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
		handle_critical_trips(tz, trip, type);
	else
		handle_non_critical_trips(tz, trip);
	/*
	 * Alright, we handled this trip successfully.
	 * So, start monitoring again.
	 */
	monitor_thermal_zone(tz);
}

对于handle_thermal_trip函数的详细分析有专门的文章章节,由于本篇文章专注于bang_bang governor,故在此不深入展开。

(3)bang_bang_control

再贴一下此函数代码:

/**
 * bang_bang_control - controls devices associated with the given zone
 * @tz: thermal_zone_device
 * @trip: the trip point
 *
 * Regulation Logic: a two point regulation, deliver cooling state depending
 * on the previous state shown in this diagram:
 *
 *                Fan:   OFF    ON
 *
 *                              |
 *                              |
 *          trip_temp:    +---->+
 *                        |     |        ^
 *                        |     |        |
 *                        |     |   Temperature
 * (trip_temp - hyst):    +<----+
 *                        |
 *                        |
 *                        |
 *
 *   * If the fan is not running and temperature exceeds trip_temp, the fan
 *     gets turned on.
 *   * In case the fan is running, temperature must fall below
 *     (trip_temp - hyst) so that the fan gets turned off again.
 *
 */
static int bang_bang_control(struct thermal_zone_device *tz, int trip)
{
	struct thermal_instance *instance;

	thermal_zone_trip_update(tz, trip);

	mutex_lock(&tz->lock);

	list_for_each_entry(instance, &tz->thermal_instances, tz_node)
		thermal_cdev_update(instance->cdev);

	mutex_unlock(&tz->lock);

	return 0;
}

函数注释已经将函数功能说得很清楚了:控制与给定thermal zone相关联的设备。调节逻辑如下:

两点调节,传递基于前一状态的冷却状态(cooling state)。前一状态参见函数代码中用字符画出的图。

若风扇并未运行并且温度超过了trip_temp(触发点温度),则风扇打开;

若风扇已在运行,温度必须降至(trip_temp - hyst)(触发点温度 - 滞回),风扇才能关闭。

(4)thermal_zone_trip_update

bang_bang_control函数中调用了thermal_zone_trip_update函数,这个函数就在bang_bang_control函数的上边,再次贴出其代码:

static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
	int trip_temp, trip_hyst;
	struct thermal_instance *instance;

	tz->ops->get_trip_temp(tz, trip, &trip_temp);

	if (!tz->ops->get_trip_hyst) {
		pr_warn_once("Undefined get_trip_hyst for thermal zone %s - "
				"running with default hysteresis zero\n", tz->type);
		trip_hyst = 0;
	} else
		tz->ops->get_trip_hyst(tz, trip, &trip_hyst);

	dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n",
				trip, trip_temp, tz->temperature,
				trip_hyst);

	mutex_lock(&tz->lock);

	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
		if (instance->trip != trip)
			continue;

		/* in case fan is in initial state, switch the fan off */
		if (instance->target == THERMAL_NO_TARGET)
			instance->target = 0;

		/* in case fan is neither on nor off set the fan to active */
		if (instance->target != 0 && instance->target != 1) {
			pr_warn("Thermal instance %s controlled by bang-bang has unexpected state: %ld\n",
					instance->name, instance->target);
			instance->target = 1;
		}

		/*
		 * enable fan when temperature exceeds trip_temp and disable
		 * the fan in case it falls below trip_temp minus hysteresis
		 */
		if (instance->target == 0 && tz->temperature >= trip_temp)
			instance->target = 1;
		else if (instance->target == 1 &&
				tz->temperature <= trip_temp - trip_hyst)
			instance->target = 0;

		dev_dbg(&instance->cdev->device, "target=%d\n",
					(int)instance->target);

		mutex_lock(&instance->cdev->lock);
		instance->cdev->updated = false; /* cdev needs update */
		mutex_unlock(&instance->cdev->lock);
	}

	mutex_unlock(&tz->lock);
}

thermal_zone_trip_update函数做了以下几件事情:

1)获取触发温度

通过tz->ops->get_trip_temp(tz, trip, &trip_temp)获取所属thermal zone的trip temp。

2)获取滞回区间

如果所属thermal zone提供了获取滞回的函数,则调用此函数tz->ops->get_trip_hyst(tz, trip, &trip_hyst);否则滞回区间默认为0。

3)设置初始状态以及异常状态下的值

遍历该thermal zone下的各个instance,将初始状态下(instance->target为THERMAL_NO_TARGET)的值由THERMAL_NO_TARGET设置为0。THERMAL_NO_TARGET为宏定义,在drivers/thermal/thermal_core.h中,如下所示:

/* Initial state of a cooling device during binding */
#define THERMAL_NO_TARGET -1UL

并且将那些风扇状态既不是关闭(instance->target != 0)又不是打开(instance->target != 1)的状态设置为打开(instance->target = 1)。

4)按照函数说明中的策略更新状态

对应代码如下:

/*
 * enable fan when temperature exceeds trip_temp and disable
 * the fan in case it falls below trip_temp minus hysteresis
 */
if (instance->target == 0 && tz->temperature >= trip_temp)
    instance->target = 1;
else if (instance->target == 1 &&
		tz->temperature <= trip_temp - trip_hyst)
    instance->target = 0;

这段代码就对应了函数说明中的规则:

若风扇并未运行并且温度超过了trip_temp(触发点温度),则风扇打开;若风扇已在运行,温度必须降至(trip_temp - hyst)(触发点温度 - 滞回),风扇才能关闭。

不过此处还只是设置了instance->target的值,实际的操作风扇的动作要由之后的函数来完成。

5)设置instance的cdev状态

mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false; /* cdev needs update */
mutex_unlock(&instance->cdev->lock);

设置instance对应的cdev的状态为false,即需要更新。为接下来控制风扇动作的函数做准备。

(5)thermal_cdev_update

thermal_cdev_update函数在drivers/thermal/thermal_helpers.c中,代码如下:

/**
 * thermal_cdev_update - update cooling device state if needed
 * @cdev:	pointer to struct thermal_cooling_device
 *
 * Update the cooling device state if there is a need.
 */
void thermal_cdev_update(struct thermal_cooling_device *cdev)
{
	mutex_lock(&cdev->lock);
	if (!cdev->updated) {
		__thermal_cdev_update(cdev);
		cdev->updated = true;
	}
	mutex_unlock(&cdev->lock);
}
EXPORT_SYMBOL(thermal_cdev_update);

工作实际是交给了__thermal_cdev_update函数来完成。该函数就在它的上边,代码如下:

void __thermal_cdev_update(struct thermal_cooling_device *cdev)
{
	struct thermal_instance *instance;
	unsigned long target = 0;

	/* Make sure cdev enters the deepest cooling state */
	list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
		dev_dbg(&cdev->device, "zone%d->target=%lu\n",
			instance->tz->id, instance->target);
		if (instance->target == THERMAL_NO_TARGET)
			continue;
		if (instance->target > target)
			target = instance->target;
	}

	thermal_cdev_set_cur_state(cdev, target);

	trace_cdev_update(cdev, target);
	dev_dbg(&cdev->device, "set to state %lu\n", target);
}

thermal_cdev_set_cur_state函数又在__thermal_cdev_update函数的上边,代码如下:

static void thermal_cdev_set_cur_state(struct thermal_cooling_device *cdev,
				       int target)
{
	if (cdev->ops->set_cur_state(cdev, target))
		return;

	thermal_notify_cdev_state_update(cdev->id, target);
	thermal_cooling_device_stats_update(cdev, target);
}

thermal_notify_cdev_state_update函数在drivers/thermal/thermal_netlink.c中,代码如下:

int thermal_notify_cdev_state_update(int cdev_id, int cdev_state)
{
	struct param p = { .cdev_id = cdev_id, .cdev_state = cdev_state };

	return thermal_genl_send_event(THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, &p);
}

thermal_netlink是不是很眼熟,在“当初”介绍thermal_init函数的时候分析过,此处用上了。正所谓“草蛇灰线,伏脉千里”。

thermal_cooling_device_stats_update函数在drivers/thermal/thermal_sysfs.c中,代码如下;

void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
					 unsigned long new_state)
{
	struct cooling_dev_stats *stats = cdev->stats;

	if (!stats)
		return;

	spin_lock(&stats->lock);

	if (stats->state == new_state)
		goto unlock;

	update_time_in_state(stats);
	stats->trans_table[stats->state * stats->max_states + new_state]++;
	stats->state = new_state;
	stats->total_trans++;

unlock:
	spin_unlock(&stats->lock);
}

__thermal_cdev_update函数中的trace_cdev_update函数在include/trace/events/thermal.h中,代码如下:

TRACE_EVENT(cdev_update,

	TP_PROTO(struct thermal_cooling_device *cdev, unsigned long target),

	TP_ARGS(cdev, target),

	TP_STRUCT__entry(
		__string(type, cdev->type)
		__field(unsigned long, target)
	),

	TP_fast_assign(
		__assign_str(type, cdev->type);
		__entry->target = target;
	),

	TP_printk("type=%s target=%lu", __get_str(type), __entry->target)
);

后边这几个函数只列出源码了,不做详细分析。通过后边这几个函数应该能看出或者感到:具体完成控制风扇打开或关闭的动作并不是一蹴而就、在Thermal框架中完成的,而是通过Linux内核的通知机制异步来完成的。

至此,bang_bang governor就全部分析完了。