zl程序教程

您现在的位置是:首页 >  系统

当前栏目

Linux 下获取进程所在文件的路径

Linux文件进程 获取 路径 所在
2023-09-11 14:18:25 时间

一、应用层

以top进程为例:

  PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND
21621 root      20   0  162148   2432   1588 R   0.3  0.0   0:00.98 top

top进程的pid为21621,可以通过查看 /proc/pid/exe:

[root@localhost c_test]# ls -l /proc/21621/exe
lrwxrwxrwx. 1 root root 0 Nov  2 21:28 /proc/21621/exe -> /usr/bin/top
[root@localhost c_test]# ls -l /proc/21621/cwd
lrwxrwxrwx. 1 root root 0 Nov  2 21:34 /proc/21621/cwd -> /root

其中/proc/21621/exe为top进程执行时所在的目录。

直接调用readlink命令获取目录:

[root@localhost c_test]# readlink /proc/21621/exe
/usr/bin/top
NAME
       readlink - print resolved symbolic links or canonical file names
       
        Print value of a symbolic link or canonical file name
#include <stdio.h>
#include <linux/limits.h>

int main()
{
    char task_absolute_path[PATH_MAX];
 
    int cnt = readlink( "/proc/self/exe", task_absolute_path, PATH_MAX);
    if (cnt < 0){
        printf("readlink is error\n");
        return -1;
    }
 
    printf("task absolute path:%s\n", task_absolute_path);

    return 0;
}

当然对于top这种系统shell命令,可以用which和whereis查看:

[root@localhost ~]# which top
/usr/bin/top
[root@localhost ~]# whereis top
top: /usr/bin/top /usr/share/man/man1/top.1.gz

对于普通程序通常用 /proc/pid/exe进行查看。

二、内核态获取

2.1 相对应的函数与结构体

struct fs_struct *fs 描述了文件系统和进程相关的信息:

// linux-3.10/include/linux/sched.h

struct task_struct {
	......
	/* filesystem information */
	struct fs_struct *fs;
	......
}
// linux-3.10/include/linux/fs_struct.h

struct fs_struct {
	int users;
	spinlock_t lock;
	seqcount_t seq;
	int umask;
	int in_exec;
	struct path root, pwd;
};

其中 struct path root 表示根目录路径,通常都是 / 目录,但是通过chroot系统调用后,对于进程来说会将 / 目录变成了某个子目录,那么相应的进程就是使用该子目录而不是全局的根目录,该进程会将该子目录当作其根目录。

chroot - run command or interactive shell with special root directory

Run COMMAND with root directory set to NEWROOT.

struct path pwd就是当前工作目录。

// linux-3.10/include/linux/path.h

struct path {
	struct vfsmount *mnt;
	struct dentry *dentry;
};
// linux-3.10/include/linux/dcache.h

/*
 * "quick string" -- eases parameter passing, but more importantly
 * saves "metadata" about the string (ie length and the hash).
 *
 * hash comes first so it snuggles against d_parent in the
 * dentry.
 */
struct qstr {
	......
	const unsigned char *name;
};

struct dentry {
	......
	struct qstr d_name;
	......
}
// linux-3.10/include/linux/sched.h

struct task_struct {
	......
	struct mm_struct *mm;
	......
}

从task_struct获取路径基本通过mm_struct这个结构,从中可以获取进程全路径。

// 获取进程全路径
task_struct->mm->exe_file->f_path

将进程的所在的文件路径存储到 /proc//exe symlink中:

// linux-3.10/include/linux/mm_types.h

struct mm_struct {
	......
	/* store ref to file /proc/<pid>/exe symlink points to */
	struct file *exe_file;
	......
}
// linux-3.10/include/linux/fs.h

struct file {
	......
	struct path		f_path;
	......
}

(1) 通过dentry_path_raw获取文件的全路径,低版本比如2.6.32没有该API

// linux-3.10/fs/dcache.c

static int prepend(char **buffer, int *buflen, const char *str, int namelen)
{
	*buflen -= namelen;
	if (*buflen < 0)
		return -ENAMETOOLONG;
	*buffer -= namelen;
	memcpy(*buffer, str, namelen);
	return 0;
}

static int prepend_name(char **buffer, int *buflen, struct qstr *name)
{
	return prepend(buffer, buflen, name->name, name->len);
}

/*
 * Write full pathname from the root of the filesystem into the buffer.
 */
static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
{
	char *end = buf + buflen;
	char *retval;

	prepend(&end, &buflen, "\0", 1);
	if (buflen < 1)
		goto Elong;
	/* Get '/' right */
	retval = end-1;
	*retval = '/';

	while (!IS_ROOT(dentry)) {
		struct dentry *parent = dentry->d_parent;
		int error;

		prefetch(parent);
		spin_lock(&dentry->d_lock);
		error = prepend_name(&end, &buflen, &dentry->d_name);
		spin_unlock(&dentry->d_lock);
		if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
			goto Elong;

		retval = end;
		dentry = parent;
	}
	return retval;
Elong:
	return ERR_PTR(-ENAMETOOLONG);
}

char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
{
	char *retval;

	write_seqlock(&rename_lock);
	retval = __dentry_path(dentry, buf, buflen);
	write_sequnlock(&rename_lock);

	return retval;
}
EXPORT_SYMBOL(dentry_path_raw);
struct file *filp;
dentry_path_raw(filp->f_path.dentry,buf,buflen);

(2)通过d_path获取文件的全路径

// linux-3.10/fs/dcache.c

/**
 * d_path - return the path of a dentry
 * @path: path to report
 * @buf: buffer to return value in
 * @buflen: buffer length
 *
 * Convert a dentry into an ASCII path name. If the entry has been deleted
 * the string " (deleted)" is appended. Note that this is ambiguous.
 *
 * Returns a pointer into the buffer or an error code if the path was
 * too long. Note: Callers should use the returned pointer, not the passed
 * in buffer, to use the name! The implementation often starts at an offset
 * into the buffer, and may leave 0 bytes at the start.
 *
 * "buflen" should be positive.
 */
char *d_path(const struct path *path, char *buf, int buflen)
{
	char *res = buf + buflen;
	struct path root;
	int error;

	/*
	 * We have various synthetic filesystems that never get mounted.  On
	 * these filesystems dentries are never used for lookup purposes, and
	 * thus don't need to be hashed.  They also don't need a name until a
	 * user wants to identify the object in /proc/pid/fd/.  The little hack
	 * below allows us to generate a name for these objects on demand:
	 */
	if (path->dentry->d_op && path->dentry->d_op->d_dname)
		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);

	get_fs_root(current->fs, &root);
	br_read_lock(&vfsmount_lock);
	write_seqlock(&rename_lock);
	error = path_with_deleted(path, &root, &res, &buflen);
	write_sequnlock(&rename_lock);
	br_read_unlock(&vfsmount_lock);
	if (error < 0)
		res = ERR_PTR(error);
	path_put(&root);
	return res;
}
EXPORT_SYMBOL(d_path);

调用d_path函数文件的路径时,应该使用返回的指针而不是转递进去的参数 buf 。
原因是该函数的实现通常从缓冲区的偏移量开始。

内核中用到d_path的例子:

// linux-3.10/include/linux/mm_types.h

/*
 * This struct defines a memory VMM memory area. There is one of these
 * per VM-area/task.  A VM area is any part of the process virtual memory
 * space that has a special rule for the page-fault handlers (ie a shared
 * library, the executable area etc).
 */
struct vm_area_struct {
	......
	struct file * vm_file;		/* File we map to (can be NULL). */
	......
}
// linux-3.10/include/linux/fs.h

struct file {
	......
	struct path		f_path;
	......
}
// linux-3.10/mm/memory.c

/*
 * Print the name of a VMA.
 */
void print_vma_addr(char *prefix, unsigned long ip)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;

	/*
	 * Do not print if we are in atomic
	 * contexts (in exception stacks, etc.):
	 */
	if (preempt_count())
		return;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, ip);
	if (vma && vma->vm_file) {
		struct file *f = vma->vm_file;
		//使用伙伴系统接口,分配一个物理页,返回一个内核虚拟地址
		char *buf = (char *)__get_free_page(GFP_KERNEL);
		if (buf) {
			char *p;

			p = d_path(&f->f_path, buf, PAGE_SIZE);
			if (IS_ERR(p))
				p = "?";
			printk("%s%s[%lx+%lx]", prefix, kbasename(p),
					vma->vm_start,
					vma->vm_end - vma->vm_start);
			free_page((unsigned long)buf);
		}
	}
	up_read(&mm->mmap_sem);
}

2.2 API演示

在这里只是简单的给出怎么在内核态获取进程所在文件的路径,详细的话请参考内核源码,在第三节给出内核源码获取进程所在文件的路径的方法。

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/pid.h>
#include <linux/fs.h>
#include <linux/fs_struct.h>
#include <linux/path.h>

#define TASK_PATH_MAX_LENGTH 512

//内核模块初始化函数
static int __init lkm_init(void)
{
    struct qstr root_task_path;
    struct qstr current_task_path;

    char buf_1[TASK_PATH_MAX_LENGTH] = {0};
    char *task_path_1 = NULL;

    char buf_2[TASK_PATH_MAX_LENGTH] = {0};
    char *task_path_2 = NULL;

	//获取当前目录名
    current_task_path = current->fs->pwd.dentry->d_name;
    //获取根目录
    root_task_path = current->fs->root.dentry->d_name;

	//内核线程的 mm 成员为空,这里没做判断
	
    //2.6.32 没有dentry_path_raw API
    //获取文件全路径
    task_path_1 = dentry_path_raw(current->mm->exe_file->f_path.dentry, buf_1, TASK_PATH_MAX_LENGTH);

	//获取文件全路径
	//调用d_path函数文件的路径时,应该使用返回的指针:task_path_2 ,而不是转递进去的参数buf:buf_2
    task_path_2 = d_path(&current->mm->exe_file->f_path, buf_2, TASK_PATH_MAX_LENGTH);
    if (IS_ERR(task_path_2)) {
        printk("Get path failed\n");
        return -1;
    }

    printk("current path = %s\n", current_task_path.name);
    printk("root path = %s\n", root_task_path.name);
    printk("task_path_1 = %s\n", task_path_1);
    printk("task_path_2 = %s\n", task_path_2);

	return -1;
}

module_init(lkm_init);

MODULE_LICENSE("GPL");

结果展示:

[root@localhost task_path]# dmesg -c
[415299.952165] current path = task_path
[415299.952172] root path = /
[415299.952176] task_path_1 = /usr/bin/kmod
[415299.952179] task_path_2 = /usr/bin/kmod

三、内核源码实现

// linux-3.10/fs/proc/base.c

/* NOTE:
 *	Implementing inode permission operations in /proc is almost
 *	certainly an error.  Permission checks need to happen during
 *	each system call not at open time.  The reason is that most of
 *	what we wish to check for permissions in /proc varies at runtime.
 *
 *	The classic example of a problem is opening file descriptors
 *	in /proc for a task before it execs a suid executable.
 */

struct pid_entry {
	char *name;
	int len;
	umode_t mode;
	const struct inode_operations *iop;
	const struct file_operations *fop;
	union proc_op op;
};

static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
{
	struct task_struct *task;
	struct mm_struct *mm;
	struct file *exe_file;

	task = get_proc_task(dentry->d_inode);
	if (!task)
		return -ENOENT;
	mm = get_task_mm(task);
	put_task_struct(task);
	if (!mm)
		return -ENOENT;
	exe_file = get_mm_exe_file(mm);
	mmput(mm);
	if (exe_file) {
		*exe_path = exe_file->f_path;
		path_get(&exe_file->f_path);
		fput(exe_file);
		return 0;
	} else
		return -ENOENT;
}

/*
 * Tasks
 */
static const struct pid_entry tid_base_stuff[] = {
	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
	......
	REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
	......
	LNK("cwd",       proc_cwd_link),
	LNK("root",      proc_root_link),
	LNK("exe",       proc_exe_link),
// linux-3.10/fs/proc/base.c

static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
{
	//由于这里申请的是一个页大小,便没有使用 kmalloc接口,调用伙伴系统接口分配一个物理页,返回内核虚拟地址
	char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
	char *pathname;
	int len;

	if (!tmp)
		return -ENOMEM;

	//获取进程所在文件的路径
	pathname = d_path(path, tmp, PAGE_SIZE);
	len = PTR_ERR(pathname);
	if (IS_ERR(pathname))
		goto out;
	len = tmp + PAGE_SIZE - 1 - pathname;

	if (len > buflen)
		len = buflen;
	if (copy_to_user(buffer, pathname, len))
		len = -EFAULT;
 out:
	free_page((unsigned long)tmp);
	return len;
}

static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
{
	int error = -EACCES;
	struct inode *inode = dentry->d_inode;
	struct path path;

	/* Are we allowed to snoop on the tasks file descriptors? */
	if (!proc_fd_access_allowed(inode))
		goto out;

	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
	if (error)
		goto out;

	error = do_proc_readlink(&path, buffer, buflen);
	path_put(&path);
out:
	return error;
}

const struct inode_operations proc_pid_link_inode_operations = {
	.readlink	= proc_pid_readlink,
	.follow_link	= proc_pid_follow_link,
	.setattr	= proc_setattr,
};


// linux-3.10/fs/proc/internal.h
union proc_op {
	int (*proc_get_link)(struct dentry *, struct path *);
	int (*proc_read)(struct task_struct *task, char *page);
	int (*proc_show)(struct seq_file *m,
		struct pid_namespace *ns, struct pid *pid,
		struct task_struct *task);
};

struct proc_inode {
	struct pid *pid;
	int fd;
	union proc_op op;
	struct proc_dir_entry *pde;
	struct ctl_table_header *sysctl;
	struct ctl_table *sysctl_entry;
	struct proc_ns ns;
	struct inode vfs_inode;
};

/*
 * General functions
 */
static inline struct proc_inode *PROC_I(const struct inode *inode)
{
	return container_of(inode, struct proc_inode, vfs_inode);
}

参考资料

Linux 3.10

https://blog.csdn.net/qq_42931917/article/details/119803534
https://blog.csdn.net/cenziboy/article/details/8761621
https://blog.csdn.net/whatday/article/details/100638552