文章目录
前言
上一篇文章记录了如何遍历进程的所有物理页面。本篇文件在其基础上将物理页面添加到bloom过滤器,加速后续页面查找工作。
一、驱动代码
test.c:
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/init.h>
#include <linux/miscdevice.h>
#include <linux/sched.h>
#include <linux/dcache.h>
#include <asm/fcntl.h>
#include <asm/processor.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/vmalloc.h>
#include <linux/gpio.h>
#include <linux/sched/rt.h>
#include <uapi/linux/sched/types.h>
#include <linux/pid.h>
#include <linux/delay.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/compiler.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
#include "bloom.h"
#define MY_DEV_NAME "my_dev"
#define TEST_PROC_DIR "page_test"
#define TEST_PROC_NAME "pid"
/*-------------------------------------------------------------------------*/
struct proc_dir_entry *test_proc_dir = NULL;
static int pid = -1;
static int test_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "echo pid to start.now:");
if(0 == pid)
{
seq_printf(m, "off\n");
}else
{
seq_printf(m, "%d\n", pid);
}
return 0; //!! must be 0, or will show nothing T.T
}
static ssize_t test_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *f_pos)
{
char *tmp = kzalloc((count + 1), GFP_KERNEL);
if(!tmp){
return -ENOMEM;
}
memset(tmp, 0x00, count+1);
if(copy_from_user(tmp, buffer, count))
{
kfree(tmp);
return -EFAULT;
}
sscanf(tmp, "%d", &pid);
kfree(tmp);
return count;
}
static int test_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, test_proc_show, NULL);
}
static struct file_operations proc_fops = {
.owner = THIS_MODULE,
.open = test_proc_open,
.read = seq_read,
.write = test_proc_write,
.llseek = seq_lseek,
.release = single_release,
};
static int init_test_proc(void)
{
struct proc_dir_entry *file = NULL;
test_proc_dir = proc_mkdir(TEST_PROC_DIR, NULL);
if(NULL == test_proc_dir){
pr_err("%s Create %s failed\n", __func__, TEST_PROC_DIR);
return -EINVAL;
}
file = proc_create(TEST_PROC_NAME, 666, test_proc_dir, &proc_fops);
if(!file){
pr_err("%s Create %s failed\n", __func__, TEST_PROC_NAME);
return -EINVAL;
}
return 0;
}
static void proc_test_exit(void)
{
proc_remove(test_proc_dir);
}
/*-------------------------------------------------------------------------*/
static int test_open(struct inode *inode, struct file *file)
{
int major = MAJOR(inode->i_rdev);
int minor = MINOR(inode->i_rdev);
pr_info("%s: major=%d, minor=%d\n", __func__, major, minor);
return 0;
}
static int test_release(struct inode *inode, struct file *file)
{
pr_info("%s \n", __func__);
return 0;
}
static ssize_t test_read(struct file *file, char __user *buf, size_t lbuf, loff_t *ppos)
{
pr_info("%s \n", __func__);
return 0;
}
static ssize_t test_write(struct file *file, const char __user *buf, size_t count, loff_t *f_pos)
{
pr_info("%s \n", __func__);
return 0;
}
static const struct file_operations test_fops = {
.owner = THIS_MODULE,
.open = test_open,
.release = test_release,
.read = test_read,
.write = test_write
};
static struct miscdevice test_misc_device ={
.minor = MISC_DYNAMIC_MINOR,
.name = MY_DEV_NAME,
.fops = &test_fops,
};
static struct task_struct *thread_task;
static struct bloom bloom;
struct task_struct *get_task_by_pid(pid_t pid) {
struct pid *proc_pid;
struct task_struct *task;
// 获取PID对象
proc_pid = find_get_pid(pid);
if (!proc_pid)
return NULL;
// 通过PID对象获取进程描述符
task = pid_task(proc_pid, PIDTYPE_PID);
if (!task) {
put_pid(proc_pid);
return NULL;
}
// 如果不需要再使用proc_pid,则释放它
put_pid(proc_pid);
// 返回进程描述符
return task;
}
static unsigned long get_task_rss(struct mm_struct *mm)
{
unsigned long anon, file, shmem;
anon = get_mm_counter(mm, MM_ANONPAGES);
file = get_mm_counter(mm, MM_FILEPAGES);
shmem = get_mm_counter(mm, MM_SHMEMPAGES);
return anon+file+shmem;
}
static unsigned int pfn_count = 0;
unsigned long virt2pfn(struct mm_struct *mm, unsigned long vaddr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long pfn;
unsigned long phys;
struct page *page;
// 获取PGD(页全局目录)
pgd = pgd_offset(mm, vaddr);
if (pgd_none(*pgd) || pgd_bad(*pgd)) {
// PGD条目不存在或无效
return 0;
}
// 获取PUD(页上级目录)
pud = pud_offset(pgd, vaddr);
if (pud_none(*pud) || pud_bad(*pud)) {
// PUD条目不存在或无效
return 0;
}
// 获取PMD(页中间目录)
pmd = pmd_offset(pud, vaddr);
if (pmd_none(*pmd) || !pmd_present(*pmd)) {
// PMD条目不存在或无效,或者页面不在内存中
return 0;
}
// 使用pte_offset获取PTE(页表条目)
pte = pte_offset_kernel(pmd, vaddr);
if (!pte_present(*pte)) {
// PTE条目不存在或无效
return 0;
}
if (!(page = pte_page(*pte))){
return 0;
}
phys = page_to_phys(page);
// 现在可以安全地访问PTE了
// 例如,获取页面帧号(PFN)
pfn = pte_pfn(*pte);
#if 0
if(PageAnon(page))
{
//bit0为1,匿名映射
printk("anon: pfn %lu, phy 0x%08lx, vaddr 0x%08lx\n", pfn, phys, vaddr);
}
else
{
//bit0为0,文件缓存
printk("file: pfn %lu, phy 0x%08lx, vaddr 0x%08lx\n", pfn, phys, vaddr);
}
#endif
pfn_count++;
return pfn;
}
static int pid_thread(void *arg)
{
struct task_struct *task;
struct mm_struct *mm;
struct vm_area_struct *vma = 0;
unsigned long vpage = 0;
unsigned long pfn = 0;
unsigned long rss = 0;
unsigned long entries = 0;
unsigned long collisions = 0;
unsigned long check = 0;
while(pid == -1)
{
msleep(1000);
}
task = get_task_by_pid(pid); // 获取当前任务组长指针
if (!task) {
printk("Failed to find the process.\n");
return -1;
}
mm = task->mm; // 获取进程的内存管理信息
if (!mm || !mm->pgd) {
printk("Invalid memory management information or page global directory is not initialized.\n");
return -1;
}
rss = get_task_rss(mm);
entries = 8*rss;
bloom_init(&bloom, entries);
if (mm && mm->mmap){
for (vma = mm->mmap; vma; vma = vma->vm_next){
for (vpage = vma->vm_start; vpage < vma->vm_end; vpage += PAGE_SIZE){
pfn = virt2pfn(mm, vpage);
if(pfn != 0)
{
if(bloom_add(&bloom, (void *)&pfn, 8))
{
collisions++;
}
}
}
}
}
printk("pfn_count %u, entries %lu\n", pfn_count, entries);
if (mm && mm->mmap){
for (vma = mm->mmap; vma; vma = vma->vm_next){
for (vpage = vma->vm_start; vpage < vma->vm_end; vpage += PAGE_SIZE){
pfn = virt2pfn(mm, vpage);
if(pfn != 0)
{
if(bloom_check(&bloom, (void *)&pfn, 8))
{
check++;
}
}
}
}
}
printk("collisions %lu, check %lu, pfn_count %u\n", collisions, check, pfn_count);
bloom_free(&bloom);
return 0;
}
static int __init test_init(void)
{
int ret;
pr_info("test_init\n");
ret = misc_register(&test_misc_device);
if (ret != 0 ) {
pr_err("failed to misc_register");
return ret;
}
thread_task = kthread_create(pid_thread, NULL, "pid-thread");
wake_up_process(thread_task);
init_test_proc();
pr_err("Minor number = %d\n", test_misc_device.minor);
return 0;
}
static void __exit test_exit(void)
{
pr_info("test_exit\n");
misc_deregister(&test_misc_device);
proc_test_exit();
}
module_init(test_init);
module_exit(test_exit);
MODULE_LICENSE("GPL");
bloom.c:
/*
* Copyright (c) 2012-2016, Jyri J. Virkki
* All rights reserved.
*
* This file is under BSD license. See LICENSE file.
*/
/*
* Refer to bloom.h for documentation on the public interfaces.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/init.h>
#include <linux/miscdevice.h>
#include <linux/sched.h>
#include <linux/dcache.h>
#include <asm/fcntl.h>
#include <asm/processor.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
#include <linux/vmalloc.h>
#include <linux/gpio.h>
#include <linux/sched/rt.h>
#include <uapi/linux/sched/types.h>
#include <linux/pid.h>
#include <linux/delay.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/compiler.h>
#include <linux/mm.h>
#include "bloom.h"
#include "murmurhash2.h"
inline static int test_bit_set_bit(unsigned char * buf,
unsigned int x, int set_bit)
{
unsigned int byte = x >> 3;
unsigned char c = buf[byte]; // expensive memory access
unsigned int mask = 1 << (x % 8);
if (c & mask) {
return 1;
} else {
if (set_bit) {
buf[byte] = c | mask;
}
return 0;
}
}
static int bloom_check_add(struct bloom * bloom,
const void * buffer, int len, int add)
{
int hits = 0;
register unsigned int a;
register unsigned int b;
register unsigned int x;
register unsigned int i;
if (bloom->ready == 0) {
printk("bloom at %p not initialized!\n", (void *)bloom);
return -1;
}
a = murmurhash2(buffer, len, 0x9747b28c);
b = murmurhash2(buffer, len, a);
for (i = 0; i < bloom->hashes; i++) {
x = (a + i*b) % bloom->bits;
if (test_bit_set_bit(bloom->bf, x, add)) {
hits++;
} else if (!add) {
// Don't care about the presence of all the bits. Just our own.
return 0;
}
}
if (hits == bloom->hashes) {
return 1; // 1 == element already in (or collision)
}
return 0;
}
int bloom_init_size(struct bloom * bloom, int entries, unsigned int cache_size)
{
return bloom_init(bloom, entries);
}
int bloom_init(struct bloom * bloom, int entries)
{
//error rate 0.001
//bpe = -log(error)/ln(2)^2
unsigned long bpe = 8325475924;//rel = bpe / 1000000000
bloom->ready = 0;
if (entries < 1) {
return 1;
}
bloom->entries = entries;
bloom->bits = (int)((entries * bpe)/1000000000);
if (bloom->bits % 8) {
bloom->bytes = (bloom->bits / 8) + 1;
} else {
bloom->bytes = bloom->bits / 8;
}
//bloom->hashes = (int)ceil(0.693147180559945 *bpe); // ln(2)
bloom->hashes = 5;
bloom->bf = (unsigned char *)kzalloc(bloom->bytes, GFP_KERNEL);
if (bloom->bf == NULL) {
return 1;
}
bloom->ready = 1;
return 0;
}
int bloom_check(struct bloom * bloom, const void * buffer, int len)
{
return bloom_check_add(bloom, buffer, len, 0);
}
int bloom_add(struct bloom * bloom, const void * buffer, int len)
{
return bloom_check_add(bloom, buffer, len, 1);
}
void bloom_free(struct bloom * bloom)
{
if (bloom->ready) {
kfree(bloom->bf);
}
bloom->ready = 0;
}
int bloom_reset(struct bloom * bloom)
{
if (!bloom->ready){
return 1;
}
memset(bloom->bf, 0, bloom->bytes);
return 0;
}
bloom.h:
/*
* Copyright (c) 2012-2016, Jyri J. Virkki
* All rights reserved.
*
* This file is under BSD license. See LICENSE file.
*/
#ifndef _BLOOM_H
#define _BLOOM_H
/** ***************************************************************************
* Structure to keep track of one bloom filter. Caller needs to
* allocate this and pass it to the functions below. First call for
* every struct must be to bloom_init().
*
*/
struct bloom
{
// These fields are part of the public interface of this structure.
// Client code may read these values if desired. Client code MUST NOT
// modify any of these.
unsigned long entries;
int bits;
int bytes;
int hashes;
// Fields below are private to the implementation. These may go away or
// change incompatibly at any moment. Client code MUST NOT access or rely
// on these.
unsigned char * bf;
int ready;
};
/** ***************************************************************************
* Initialize the bloom filter for use.
*
* The filter is initialized with a bit field and number of hash functions
* according to the computations from the wikipedia entry:
* http://en.wikipedia.org/wiki/Bloom_filter
*
* Optimal number of bits is:
* bits = (entries * ln(error)) / ln(2)^2
*
* Optimal number of hash functions is:
* hashes = bpe * ln(2)
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* entries - The expected number of entries which will be inserted.
* error - Probability of collision (as long as entries are not
* exceeded).
*
* Return:
* -------
* 0 - on success
* 1 - on failure
*
*/
int bloom_init(struct bloom * bloom, int entries);
/** ***************************************************************************
* Deprecated, use bloom_init()
*
*/
int bloom_init_size(struct bloom * bloom, int entries, unsigned int cache_size);
/** ***************************************************************************
* Check if the given element is in the bloom filter. Remember this may
* return false positive if a collision occured.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to check.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element is not present
* 1 - element is present (or false positive due to collision)
* -1 - bloom not initialized
*
*/
int bloom_check(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Add the given element to the bloom filter.
* The return code indicates if the element (or a collision) was already in,
* so for the common check+add use case, no need to call check separately.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to add.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element was not present and was added
* 1 - element (or a collision) had already been added previously
* -1 - bloom not initialized
*
*/
int bloom_add(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Deallocate internal storage.
*
* Upon return, the bloom struct is no longer usable. You may call bloom_init
* again on the same struct to reinitialize it again.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
*
* Return: none
*
*/
void bloom_free(struct bloom * bloom);
/** ***************************************************************************
* Erase internal storage.
*
* Erases all elements. Upon return, the bloom struct returns to its initial
* (initialized) state.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
*
* Return:
* 0 - on success
* 1 - on failure
*
*/
int bloom_reset(struct bloom * bloom);
#endif
murmurHash2.c:
//-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
unsigned int murmurhash2(const void * key, int len, const unsigned int seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}
murmurHash2.h:
#ifndef _BLOOM_MURMURHASH2
#define _BLOOM_MURMURHASH2
unsigned int murmurhash2(const void * key, int len, const unsigned int seed);
#endif
2.bloom过滤器
bloom过滤器参考libbloom。为了屏蔽源码中的log、ln和浮点等,将误码率固定为0.001.

被折叠的 条评论
为什么被折叠?



