%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/self/root/usr/share/systemtap/runtime/
Upload File :
Create Path :
Current File : //proc/self/root/usr/share/systemtap/runtime/task_finder_vma.c

#ifndef TASK_FINDER_VMA_C
#define TASK_FINDER_VMA_C

#include <linux/file.h>
#include <linux/list.h>
#include <linux/jhash.h>

#include <linux/fs.h>
#include <linux/dcache.h>

#include "stp_helper_lock.h"

// __stp_tf_vma_lock protects the hash table.
// Documentation/spinlocks.txt suggest we can be a bit more clever
// if we guarantee that in interrupt context we only read, not write
// the datastructures. We should never change the hash table or the
// contents in interrupt context (which should only ever call 
// stap_find_vma_map_info for getting stored vma info). So we might
// want to look into that if this seems a bottleneck.
static STP_DEFINE_RWLOCK(__stp_tf_vma_lock);

#define __STP_TF_HASH_BITS 4
#define __STP_TF_TABLE_SIZE (1 << __STP_TF_HASH_BITS)

#ifndef TASK_FINDER_VMA_ENTRY_PATHLEN
#define TASK_FINDER_VMA_ENTRY_PATHLEN 64
#elif TASK_FINDER_VMA_ENTRY_PATHLEN < 8
#error "gimme a little more TASK_FINDER_VMA_ENTRY_PATHLEN"
#endif


struct __stp_tf_vma_entry {
	struct hlist_node hlist;

	pid_t pid;
	unsigned long vm_start;
	unsigned long vm_end;
        char path[TASK_FINDER_VMA_ENTRY_PATHLEN]; /* mmpath name, if known */

	// User data (possibly stp_module)
	void *user;
};

static struct hlist_head *__stp_tf_vma_map;

// __stp_tf_vma_new_entry(): Returns an newly allocated or NULL.
// Must only be called from user context.
// ... except, with inode-uprobes / task-finder2, it can be called from
// random tracepoints.  So we cannot sleep after all.
static struct __stp_tf_vma_entry *
__stp_tf_vma_new_entry(void)
{
	struct __stp_tf_vma_entry *entry;
	size_t size = sizeof (struct __stp_tf_vma_entry);
#ifdef CONFIG_UTRACE
	entry = (struct __stp_tf_vma_entry *) _stp_kmalloc_gfp(size,
                                                         STP_ALLOC_SLEEP_FLAGS);
#else
	entry = (struct __stp_tf_vma_entry *) _stp_kmalloc_gfp(size,
                                                               STP_ALLOC_FLAGS);
#endif
	return entry;
}

// __stp_tf_vma_release_entry(): Frees an entry.
static void
__stp_tf_vma_release_entry(struct __stp_tf_vma_entry *entry)
{
	_stp_kfree (entry);
}

// stap_initialize_vma_map():  Initialize the free list.  Grabs the
// spinlock.  Should be called before any of the other stap_*_vma_map
// functions.  Since this is run before any other function is called,
// this doesn't need any locking.  Should be called from a user context
// since it can allocate memory.
static int
stap_initialize_vma_map(void)
{
	size_t size = sizeof(struct hlist_head) * __STP_TF_TABLE_SIZE;
	struct hlist_head *map = (struct hlist_head *) _stp_kzalloc_gfp(size,
							STP_ALLOC_SLEEP_FLAGS);
	if (map == NULL)
		return -ENOMEM;

	__stp_tf_vma_map = map;
	return 0;
}

// stap_destroy_vma_map(): Unconditionally destroys vma entries.
// Nothing should be using it anymore. Doesn't lock anything and just
// frees all items.
static void
stap_destroy_vma_map(void)
{
	if (__stp_tf_vma_map != NULL) {
		int i;
		for (i = 0; i < __STP_TF_TABLE_SIZE; i++) {
			struct hlist_head *head = &__stp_tf_vma_map[i];
			struct hlist_node *node;
			struct hlist_node *n;
			struct __stp_tf_vma_entry *entry = NULL;

			if (hlist_empty(head))
				continue;

		        stap_hlist_for_each_entry_safe(entry, node, n, head, hlist) {
				hlist_del(&entry->hlist);
				__stp_tf_vma_release_entry(entry);
			}
		}
		_stp_kfree(__stp_tf_vma_map);
	}
}


// __stp_tf_vma_map_hash(): Compute the vma map hash.
static inline u32
__stp_tf_vma_map_hash(struct task_struct *tsk)
{
    return (jhash_1word(tsk->pid, 0) & (__STP_TF_TABLE_SIZE - 1));
}

// Get vma_entry if the vma is present in the vma map hash table.
// Returns NULL if not present.  The __stp_tf_vma_lock must be read locked
// before calling this function.
static struct __stp_tf_vma_entry *
__stp_tf_get_vma_map_entry_internal(struct task_struct *tsk,
				    unsigned long vm_start)
{
	struct hlist_head *head;
	struct hlist_node *node;
	struct __stp_tf_vma_entry *entry;

	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
	stap_hlist_for_each_entry(entry, node, head, hlist) {
		if (tsk->pid == entry->pid
		    && vm_start == entry->vm_start) {
			return entry;
		}
	}
	return NULL;
}

// Get vma_entry if the vma with the given vm_end is present in the vma map
// hash table for the tsk.  Returns NULL if not present.
// The __stp_tf_vma_lock must be read locked before calling this function.
static struct __stp_tf_vma_entry *
__stp_tf_get_vma_map_entry_end_internal(struct task_struct *tsk,
					unsigned long vm_end)
{
	struct hlist_head *head;
	struct hlist_node *node;
	struct __stp_tf_vma_entry *entry;

	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
	stap_hlist_for_each_entry(entry, node, head, hlist) {
		if (tsk->pid == entry->pid
		    && vm_end == entry->vm_end) {
			return entry;
		}
	}
	return NULL;
}


// Add the vma info to the vma map hash table.
// Caller is responsible for name lifetime.
// Can allocate memory, so needs to be called
// only from user context.
static int
stap_add_vma_map_info(struct task_struct *tsk,
		      unsigned long vm_start, unsigned long vm_end,
		      const char *path, void *user)
{
	struct hlist_head *head;
	struct hlist_node *node;
	struct __stp_tf_vma_entry *entry;
	struct __stp_tf_vma_entry *new_entry;
	unsigned long flags;

	// Take a write lock, since we are most likely going to write
	// after reading. But reserve a new entry first outside the lock.
	new_entry = __stp_tf_vma_new_entry();
	stp_write_lock_irqsave(&__stp_tf_vma_lock, flags);
	entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start);
	if (entry != NULL) {
		stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
		if (new_entry)
			__stp_tf_vma_release_entry(new_entry);
		return -EBUSY;	/* Already there */
	}

	if (!new_entry) {
		stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
		return -ENOMEM;
	}

	// Fill in the info
	entry = new_entry;
	entry->pid = tsk->pid;
	entry->vm_start = vm_start;
	entry->vm_end = vm_end;
        if (strlen(path) >= TASK_FINDER_VMA_ENTRY_PATHLEN-3)
          {
            strlcpy (entry->path, "...", TASK_FINDER_VMA_ENTRY_PATHLEN);
            strlcpy (entry->path+3, &path[strlen(path)-TASK_FINDER_VMA_ENTRY_PATHLEN+4],
                     TASK_FINDER_VMA_ENTRY_PATHLEN-3);
          }
        else
          {
            strlcpy (entry->path, path, TASK_FINDER_VMA_ENTRY_PATHLEN);
          }
	entry->user = user;

	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
	hlist_add_head(&entry->hlist, head);
	stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
	return 0;
}

// Extend the vma info vm_end in the vma map hash table if there is already
// a vma_info which ends precisely where this new one starts for the given
// task. Returns zero on success, -ESRCH if no existing matching entry could
// be found.
static int
stap_extend_vma_map_info(struct task_struct *tsk,
			 unsigned long vm_start, unsigned long vm_end)
{
	struct hlist_head *head;
	struct hlist_node *node;
	struct __stp_tf_vma_entry *entry;

	unsigned long flags;
	int res = -ESRCH; // Entry not there or doesn't match.

	// Take a write lock, since we are most likely going to write
	// to the entry after reading, if its vm_end matches our vm_start.
	stp_write_lock_irqsave(&__stp_tf_vma_lock, flags);
	entry = __stp_tf_get_vma_map_entry_end_internal(tsk, vm_start);
	if (entry != NULL) {
		entry->vm_end = vm_end;
		res = 0;
	}
	stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
	return res;
}


// Remove the vma entry from the vma hash table.
// Returns -ESRCH if the entry isn't present.
static int
stap_remove_vma_map_info(struct task_struct *tsk, unsigned long vm_start)
{
	struct hlist_head *head;
	struct hlist_node *node;
	struct __stp_tf_vma_entry *entry;
	int rc = -ESRCH;

	// Take a write lock since we are most likely going to delete
	// after reading.
	unsigned long flags;
	stp_write_lock_irqsave(&__stp_tf_vma_lock, flags);
	entry = __stp_tf_get_vma_map_entry_internal(tsk, vm_start);
	if (entry != NULL) {
		hlist_del(&entry->hlist);
		__stp_tf_vma_release_entry(entry);
                rc = 0;
	}
	stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
	return rc;
}

// Finds vma info if the vma is present in the vma map hash table for
// a given task and address (between vm_start and vm_end).
// Returns -ESRCH if not present.  The __stp_tf_vma_lock must *not* be
// locked before calling this function.
static int
stap_find_vma_map_info(struct task_struct *tsk, unsigned long addr,
		       unsigned long *vm_start, unsigned long *vm_end,
		       const char **path, void **user)
{
	struct hlist_head *head;
	struct hlist_node *node;
	struct __stp_tf_vma_entry *entry;
	struct __stp_tf_vma_entry *found_entry = NULL;
	int rc = -ESRCH;
	unsigned long flags;

	if (__stp_tf_vma_map == NULL)
		return rc;

	stp_read_lock_irqsave(&__stp_tf_vma_lock, flags);
	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
	stap_hlist_for_each_entry(entry, node, head, hlist) {
		if (tsk->pid == entry->pid
		    && addr >= entry->vm_start
		    && addr < entry->vm_end) {
			found_entry = entry;
			break;
		}
	}
	if (found_entry != NULL) {
		if (vm_start != NULL)
			*vm_start = found_entry->vm_start;
		if (vm_end != NULL)
			*vm_end = found_entry->vm_end;
		if (path != NULL)
			*path = found_entry->path;
		if (user != NULL)
			*user = found_entry->user;
		rc = 0;
	}
	stp_read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
	return rc;
}

// Finds vma info if the vma is present in the vma map hash table for
// a given task with the given user handle.
// Returns -ESRCH if not present.  The __stp_tf_vma_lock must *not* be
// locked before calling this function.
static int
stap_find_vma_map_info_user(struct task_struct *tsk, void *user,
			    unsigned long *vm_start, unsigned long *vm_end,
			    const char **path)
{
	struct hlist_head *head;
	struct hlist_node *node;
	struct __stp_tf_vma_entry *entry;
	struct __stp_tf_vma_entry *found_entry = NULL;
	int rc = -ESRCH;
	unsigned long flags;

	if (__stp_tf_vma_map == NULL)
		return rc;

	stp_read_lock_irqsave(&__stp_tf_vma_lock, flags);
	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
	stap_hlist_for_each_entry(entry, node, head, hlist) {
		if (tsk->pid == entry->pid
		    && user == entry->user) {
			found_entry = entry;
			break;
		}
	}
	if (found_entry != NULL) {
		if (vm_start != NULL)
			*vm_start = found_entry->vm_start;
		if (vm_end != NULL)
			*vm_end = found_entry->vm_end;
		if (path != NULL)
			*path = found_entry->path;
		rc = 0;
	}
	stp_read_unlock_irqrestore(&__stp_tf_vma_lock, flags);
	return rc;
}

static int
stap_drop_vma_maps(struct task_struct *tsk)
{
	struct hlist_head *head;
	struct hlist_node *node;
	struct hlist_node *n;
	struct __stp_tf_vma_entry *entry;

	unsigned long flags;
	stp_write_lock_irqsave(&__stp_tf_vma_lock, flags);
	head = &__stp_tf_vma_map[__stp_tf_vma_map_hash(tsk)];
        stap_hlist_for_each_entry_safe(entry, node, n, head, hlist) {
            if (tsk->pid == entry->pid) {
		    hlist_del(&entry->hlist);
		    __stp_tf_vma_release_entry(entry);
            }
        }
	stp_write_unlock_irqrestore(&__stp_tf_vma_lock, flags);
	return 0;
}

/*
 * stap_find_exe_file - acquire a reference to the mm's executable file
 *
 * Returns NULL if mm has no associated executable file.  User must
 * release file via fput().
 */
static struct file*
stap_find_exe_file(struct mm_struct* mm)
{
	// The following kernel commit changed the way the exported
	// get_mm_exe_file() works. This commit first appears in the
	// 4.1 kernel:
	//
	// commit 90f31d0ea88880f780574f3d0bb1a227c4c66ca3
	// Author: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
	// Date:   Thu Apr 16 12:47:56 2015 -0700
	// 
	//     mm: rcu-protected get_mm_exe_file()
	//     
	//     This patch removes mm->mmap_sem from mm->exe_file read side.
	//     Also it kills dup_mm_exe_file() and moves exe_file
	//     duplication into dup_mmap() where both mmap_sems are
	//     locked.
	//
	// So, for kernels >= 4.1, we'll use get_mm_exe_file(). For
	// kernels < 4.1 but with get_mm_exe_file() exported, we'll
	// still use our own code. The original get_mm_exe_file() can
	// sleep (since it calls down_read()), so we'll have to roll
	// our own.
#if defined(STAPCONF_DPATH_PATH) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0))
	return get_mm_exe_file(mm);
#else
	struct file *exe_file = NULL;

	// The down_read() function can sleep, so we'll call
	// down_read_trylock() instead, which can fail.  If it
	// fails, we'll just pretend this task didn't have a
	// exe file.
	if (mm && down_read_trylock(&mm->mmap_sem)) {

		// VM_EXECUTABLE was killed in kernel commit e9714acf,
		// but in kernels that new we can just use
		// mm->exe_file anyway. (PR14712)
#ifdef VM_EXECUTABLE
		struct vm_area_struct *vma;
		for (vma = mm->mmap; vma; vma = vma->vm_next) {
			if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) {
				exe_file = vma->vm_file;
				break;
			}
		}
#else
		exe_file = mm->exe_file;
#endif
		if (exe_file)
			get_file(exe_file);
		up_read(&mm->mmap_sem);
	}
	return exe_file;
#endif
}

#endif /* TASK_FINDER_VMA_C */

Zerion Mini Shell 1.0