从Linux Kernel2.6.31版本开始,Linux内核开始提供一个叫__NR_perf_counter_open(最新的版本里叫__NR_perf_event_open)的系统调用。使用这个系统调用我们可以像使用文件一样打开一个Performance counter,通过设置不同的参数让这个Performance Counter统计不同的软件或硬件事件,然后就可以向读文件一样来读取这些事件的统计结果。比如我可以打开一个Performance Counter统计某一个进程的CPU Cache Miss次数。关于如何传递参数构造Performance Counter来统计不同的事件可以看这篇日志: http://tblog29.appspot.com/blog/1004
下面是我写的一个小程序,它为每个CPU和每个进程开一个Performance Counter,统计每个CPU上的Cache miss和每个进程上的Cache miss(不能统计每个进程在单个CPU上的事件,详见上边那篇日志)。本代码参考了 perf 的stat部分。运行需要CAP_SYS_ADMIN权限
1 mperf.h
/* * eperf.h * * Created on: Jan 28, 2010 * Author: hchen */ #ifndef EPERF_H_ #define EPERF_H_ #include <time.h> #include <asm/unistd.h> #include "perf_event.h" #define MAX_COUNTERS 256 #define MAX_NR_CPUS 32 #define PROC "/proc" /* * We define u64 as unsigned long long for every architecture * so that we can print it with %Lx without getting warnings. */ typedef unsigned long long u64; typedef signed long long s64; typedef unsigned int u32; typedef signed int s32; typedef unsigned short u16; typedef signed short s16; typedef unsigned char u8; typedef signed char s8; static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { attr->size = sizeof(*attr); //This system call is defined in asm/unistd.h, in the latest linux kernel //it's name has been changed to __NR_perf_event_open . return syscall(__NR_perf_counter_open, attr, pid, cpu, group_fd, flags); } #endif /* EPERF_H_ */
2 eperf.c
#include <time.h> #include <unistd.h> #include <stdlib.h> #include <stdio.h> #include <asm/unistd.h> #include <dirent.h> #include "eperf.h" unsigned int verbose = 0; //event to be countered static struct perf_event_attr attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES } }; int nr_counters = 0; static unsigned int nr_cpus = 0; // amount of cpus static int inherit = 1; static int scale = 1; //used to save performance counter static int fd[MAX_COUNTERS]; /* * Read out the results of a single counter: */ static void read_counter(int counter) { u64 single_count[3]; size_t res, nv; if (fd[counter] <= 0) return; nv = scale ? 3 : 1; res = read(fd[counter], single_count, nv * sizeof(u64)); if(res == nv * sizeof(u64)){ if(verbose) printf("Counter %d: %llu\n", counter, single_count[0]); }else{ fprintf(stderr, "Fail to read counter %d\n", counter); } } void close_all_counters(){ int counter, tn; tn = nr_cpus + nr_counters; for (counter = 0; counter < tn; counter++){ if (fd[counter] <= 0) continue; close(fd[counter]); fd[counter] = -1; } } void run_perf_stat() { int counter, tn; tn = nr_cpus + nr_counters; for (counter = 0; counter < tn; counter++) read_counter(counter); } static void create_perf_stat_counter(int counter, int pid, int system_wide) { struct perf_event_attr attr; //cache miss memcpy(&attr, attrs, sizeof(struct perf_event_attr)); if (scale) attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) { unsigned int cpu; for (cpu = 0; cpu < nr_cpus; cpu++) { fd[cpu] = sys_perf_event_open(&attr, -1, cpu, -1, 0); } } else { attr.inherit = inherit; attr.disabled = 0; attr.enable_on_exec = 1; fd[counter + nr_cpus] = sys_perf_event_open(&attr, pid, -1, -1, 0); } } int main(int argc, const char **argv) { if(argc > 1) verbose = atoi(argv[1]); DIR *dir; struct dirent *drp; int run_count, p, pid; struct timespec tim, tim2; tim.tv_sec = 1; tim.tv_nsec = 0; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);//the the number of CPU int counter = 0; /* Open /proc directory */ if ((dir = opendir(PROC)) == NULL) { perror("opendir /proc error!"); exit(4); } //create counters for each CPU create_perf_stat_counter(-1, 1, 1); p = 0; while ((p++) < 254) { /* Get directory entries */ while ((drp = readdir(dir)) != NULL) { if (isdigit(drp->d_name[0])) break; } if (drp) { pid = atoi(drp->d_name); create_perf_stat_counter(counter, pid, 0); if(fd[counter] != -1) counter++; } } nr_counters = counter - 1; /* Close /proc directory */ closedir(dir); run_count = 100; // for (run_idx = 0; run_idx < run_count; run_idx++) { while (1) { nanosleep(&tim , &tim2); run_perf_stat(); } close_all_counters(); return 1; }