zl程序教程

您现在的位置是:首页 >  系统

当前栏目

Linux内核Thermal框架详解十三、Thermal Governor(3)

Linux内核框架 详解 十三
2023-09-14 09:09:19 时间

接前一篇文章Linux内核Thermal框架详解十二、Thermal Governor(2)

二、具体温控策略

上一篇文章介绍并详细分析了bang_bang governor的源码。本文介绍第2种温控策略:fair_share。

2. fair_share

fair_share governor总的策略是频率档位⽐较多的cooling device优先降频。

fair_share governor的代码在drivers/thermal/gov_fair_share.c中,也很简短,一共才124行,有效代码不到100行。如下所示:

#include <linux/thermal.h>
#include <trace/events/thermal.h>

#include "thermal_core.h"

/**
 * get_trip_level: - obtains the current trip level for a zone
 * @tz:		thermal zone device
 */
static int get_trip_level(struct thermal_zone_device *tz)
{
	int count = 0;
	int trip_temp;
	enum thermal_trip_type trip_type;

	if (tz->trips == 0 || !tz->ops->get_trip_temp)
		return 0;

	for (count = 0; count < tz->trips; count++) {
		tz->ops->get_trip_temp(tz, count, &trip_temp);
		if (tz->temperature < trip_temp)
			break;
	}

	/*
	 * count > 0 only if temperature is greater than first trip
	 * point, in which case, trip_point = count - 1
	 */
	if (count > 0) {
		tz->ops->get_trip_type(tz, count - 1, &trip_type);
		trace_thermal_zone_trip(tz, count - 1, trip_type);
	}

	return count;
}

static long get_target_state(struct thermal_zone_device *tz,
		struct thermal_cooling_device *cdev, int percentage, int level)
{
	unsigned long max_state;

	cdev->ops->get_max_state(cdev, &max_state);

	return (long)(percentage * level * max_state) / (100 * tz->trips);
}

/**
 * fair_share_throttle - throttles devices associated with the given zone
 * @tz: thermal_zone_device
 * @trip: trip point index
 *
 * Throttling Logic: This uses three parameters to calculate the new
 * throttle state of the cooling devices associated with the given zone.
 *
 * Parameters used for Throttling:
 * P1. max_state: Maximum throttle state exposed by the cooling device.
 * P2. percentage[i]/100:
 *	How 'effective' the 'i'th device is, in cooling the given zone.
 * P3. cur_trip_level/max_no_of_trips:
 *	This describes the extent to which the devices should be throttled.
 *	We do not want to throttle too much when we trip a lower temperature,
 *	whereas the throttling is at full swing if we trip critical levels.
 *	(Heavily assumes the trip points are in ascending order)
 * new_state of cooling device = P3 * P2 * P1
 */
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{
	struct thermal_instance *instance;
	int total_weight = 0;
	int total_instance = 0;
	int cur_trip_level = get_trip_level(tz);

	mutex_lock(&tz->lock);

	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
		if (instance->trip != trip)
			continue;

		total_weight += instance->weight;
		total_instance++;
	}

	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
		int percentage;
		struct thermal_cooling_device *cdev = instance->cdev;

		if (instance->trip != trip)
			continue;

		if (!total_weight)
			percentage = 100 / total_instance;
		else
			percentage = (instance->weight * 100) / total_weight;

		instance->target = get_target_state(tz, cdev, percentage,
						    cur_trip_level);

		mutex_lock(&cdev->lock);
		__thermal_cdev_update(cdev);
		mutex_unlock(&cdev->lock);
	}

	mutex_unlock(&tz->lock);
	return 0;
}

static struct thermal_governor thermal_gov_fair_share = {
	.name		= "fair_share",
	.throttle	= fair_share_throttle,
};
THERMAL_GOVERNOR_DECLARE(thermal_gov_fair_share);

同样是麻雀虽小,五脏俱全。别看代码行数比较少,但是背后的机制却并不简单。一段一段来进行分析。

(1)THERMAL_GOVERNOR_DECLARE相关代码

先来看THERMAL_GOVERNOR_DECLARE。它是一个宏定义,在drivers/thermal/thermal_core.h中,代码如下:

/* Init section thermal table */
extern struct thermal_governor *__governor_thermal_table[];
extern struct thermal_governor *__governor_thermal_table_end[];
 
#define THERMAL_TABLE_ENTRY(table, name)			\
	static typeof(name) *__thermal_table_entry_##name	\
	__used __section("__" #table "_thermal_table") = &name
 
#define THERMAL_GOVERNOR_DECLARE(name)	THERMAL_TABLE_ENTRY(governor, name)

实际上这段代码在前文Linux内核Thermal框架详解四、Thermal Core(3)中已经进行了详细分析,这里就不再赘述了。不过为了便于理解和加深印象,将fair_share governor展开后的代码再次列出:

static struct thermal_governor thermal_gov_fair_share = {
	.name		= "fair_share",
	.throttle	= fair_share_throttle,
};

static struct thermal_governor *__thermal_table_entry_thermal_gov_fair_share    \
    __used __section("__governor_thermal_table") = &thermal_gov_fair_share

Thermal Governor都是通过THERMAL_GOVERNOR_DECLARE定义到了__governor_thermal_table这段空间内。然后在thermal core初始化时通过调用thermal_register_governors来注册到thermal_governor_list链表中。再之后通过经由“thermal_init->thermal_register_governors-> thermal_set_governor”路径和thermal zone device关联上。

(2)handle_non_critical_trips

struct thermal_governor中有一个成员throttle,其是一个函数指针:

int (*throttle)(struct thermal_zone_device *tz, int trip);

对于对象thermal_gov_fair_share来说,指向了fair_share_throttle函数。在解析fair_share_throttle函数之前,有一个问题必须弄清楚:这个函数是何时被调用的?

是在drivers/thermal/thermal_core.c的handle_non_critical_trips函数中,代码如下:

static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip)
{
	tz->governor ? tz->governor->throttle(tz, trip) :
		       def_governor->throttle(tz, trip);
}

那么又是哪里调用的handle_non_critical_trips?是在drivers/thermal/thermal_core.c的handle_thermal_trip函数中,代码如下:

static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
{
	enum thermal_trip_type type;
	int trip_temp, hyst = 0;

	/* Ignore disabled trip points */
	if (test_bit(trip, &tz->trips_disabled))
		return;

	tz->ops->get_trip_temp(tz, trip, &trip_temp);
	tz->ops->get_trip_type(tz, trip, &type);
	if (tz->ops->get_trip_hyst)
		tz->ops->get_trip_hyst(tz, trip, &hyst);

	if (tz->last_temperature != THERMAL_TEMP_INVALID) {
		if (tz->last_temperature < trip_temp &&
		    tz->temperature >= trip_temp)
			thermal_notify_tz_trip_up(tz->id, trip,
						  tz->temperature);
		if (tz->last_temperature >= trip_temp &&
		    tz->temperature < (trip_temp - hyst))
			thermal_notify_tz_trip_down(tz->id, trip,
						    tz->temperature);
	}

	if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
		handle_critical_trips(tz, trip, type);
	else
		handle_non_critical_trips(tz, trip);
	/*
	 * Alright, we handled this trip successfully.
	 * So, start monitoring again.
	 */
	monitor_thermal_zone(tz);
}

对于handle_thermal_trip函数的详细分析有专门的文章章节,由于本篇文章专注于fair_share governor,故在此不深入展开。

(3)fair_share_throttle

再贴一下此函数代码:

/**
 * fair_share_throttle - throttles devices associated with the given zone
 * @tz: thermal_zone_device
 * @trip: trip point index
 *
 * Throttling Logic: This uses three parameters to calculate the new
 * throttle state of the cooling devices associated with the given zone.
 *
 * Parameters used for Throttling:
 * P1. max_state: Maximum throttle state exposed by the cooling device.
 * P2. percentage[i]/100:
 *	How 'effective' the 'i'th device is, in cooling the given zone.
 * P3. cur_trip_level/max_no_of_trips:
 *	This describes the extent to which the devices should be throttled.
 *	We do not want to throttle too much when we trip a lower temperature,
 *	whereas the throttling is at full swing if we trip critical levels.
 *	(Heavily assumes the trip points are in ascending order)
 * new_state of cooling device = P3 * P2 * P1
 */
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{
	struct thermal_instance *instance;
	int total_weight = 0;
	int total_instance = 0;
	int cur_trip_level = get_trip_level(tz);

	mutex_lock(&tz->lock);

	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
		if (instance->trip != trip)
			continue;

		total_weight += instance->weight;
		total_instance++;
	}

	list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
		int percentage;
		struct thermal_cooling_device *cdev = instance->cdev;

		if (instance->trip != trip)
			continue;

		if (!total_weight)
			percentage = 100 / total_instance;
		else
			percentage = (instance->weight * 100) / total_weight;

		instance->target = get_target_state(tz, cdev, percentage,
						    cur_trip_level);

		mutex_lock(&cdev->lock);
		__thermal_cdev_update(cdev);
		mutex_unlock(&cdev->lock);
	}

	mutex_unlock(&tz->lock);
	return 0;
}

函数注释已经将函数功能说得很清楚了:对与给定thermal zone关联的设备进行节流。调节逻辑如下:

使用3个参数计算与给定thermal zone相关联的冷却设备的最新throttle state。

用于节流的3个参数(注意不是函数的参数):

  • 参数1. max_state

冷却设备暴露的最大throttle state。

  • 参数2. percentage[i]/100

第i个设备在冷却给定区域方面的”有效性”。

  • 参数3. cur_trip_level/max_no_of_trips

这个参数描述设备应被节流的限度。当到达较低的温度时,不需要节流太多;反之如果在临界水平,节流就处于全开状态。在很大程度上假设跳闸点按升序排列。

new_state of cooling device = P3 * P2 * P1

代码的大致流程如下:

1)得到指定thermal zone的trip level

通过get_trip_level(tz)得到指定thermal zone的trip level。

get_trip_level函数在同文件(drivers/thermal/gov_fair_share.c)中实现,代码如下:

/**
 * get_trip_level: - obtains the current trip level for a zone
 * @tz:		thermal zone device
 */
static int get_trip_level(struct thermal_zone_device *tz)
{
	int count = 0;
	int trip_temp;
	enum thermal_trip_type trip_type;

	if (tz->trips == 0 || !tz->ops->get_trip_temp)
		return 0;

	for (count = 0; count < tz->trips; count++) {
		tz->ops->get_trip_temp(tz, count, &trip_temp);
		if (tz->temperature < trip_temp)
			break;
	}

	/*
	 * count > 0 only if temperature is greater than first trip
	 * point, in which case, trip_point = count - 1
	 */
	if (count > 0) {
		tz->ops->get_trip_type(tz, count - 1, &trip_type);
		trace_thermal_zone_trip(tz, count - 1, trip_type);
	}

	return count;
}

依次遍历各个触发点(trips),并得到相应触发点的温度。如果给定thermal zone的温度小于某一触发点的温度,则跳出循环。

未完待续……